/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/infermeta/ternary.h"

#include "glog/logging.h"

#include "paddle/common/ddim.h"
#include "paddle/common/errors.h"
#include "paddle/common/layout.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/infermeta/binary.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
#include "paddle/phi/kernels/funcs/common_shape.h"
#include "paddle/phi/kernels/impl/box_coder.h"

namespace phi {
namespace detail {
// Used in MatrixRankAtolRtolInferMeta
static DDim CheckAndGetOutputDim(const DDim& dim_x) {
  auto x_vec = common::vectorize(dim_x);
  if (x_vec.size() == 2) {
    return common::make_ddim({});
  }
  x_vec.erase(x_vec.end() - 2, x_vec.end());
  return common::make_ddim(x_vec);
}
}  // namespace detail

void AccuracyInferMeta(const MetaTensor& out,
                       const MetaTensor& indice,
                       const MetaTensor& label,
                       MetaTensor* accuracy,
                       MetaTensor* correct,
                       MetaTensor* total,
                       MetaConfig config) {
  auto inference_dim = out.dims();
  auto label_dim = label.dims();
  // Assume indices has same shape as inference, because
  // it's the output of topk.
  PADDLE_ENFORCE_EQ(
      label_dim.size(),
      2,
      common::errors::InvalidArgument(
          "ShapeError: label's dimensions of AccuracyOp must be 2. "
          "But received label's dimensions = %d, label's shape = [%s]",
          label_dim.size(),
          label_dim));
  if (config.is_runtime) {
    PADDLE_ENFORCE_EQ(label_dim[1],
                      1,
                      common::errors::InvalidArgument(
                          "ShapeError: label's second dimension of "
                          "AccuracyOp must be 1. But received label's "
                          "second dimension is = %d, label's shape = [%s]",
                          label_dim[1],
                          label_dim));
    PADDLE_ENFORCE_EQ(
        inference_dim[0],
        label_dim[0],
        common::errors::InvalidArgument(
            "ShapeError: the output's num_rows of AccuracyOp must be"
            " the same as label's num_rows. But received output's "
            "shape = [%s], label's shape = [%s], output's num_rows = %d, "
            "label's "
            "num_rows = %d",
            inference_dim,
            label_dim,
            inference_dim[0],
            label_dim[0]));
  }

  accuracy->set_dims(common::make_ddim({}));
  correct->set_dims(common::make_ddim({}));
  total->set_dims(common::make_ddim({}));
  accuracy->set_dtype(out.dtype());
  correct->set_dtype(out.dtype());
  total->set_dtype(out.dtype());
  accuracy->share_lod(out);
}

void AddmmInferMeta(const MetaTensor& input,
                    const MetaTensor& x,
                    const MetaTensor& y,
                    float beta,
                    float alpha,
                    MetaTensor* out) {
  auto input_dims = input.dims();
  auto x_dims = x.dims();
  auto y_dims = y.dims();

  auto ndim_input = input_dims.size();
  auto ndim_x = x_dims.size();
  auto ndim_y = y_dims.size();

  VLOG(3) << "addmm operator input.shape=" << input_dims
          << " x.shape=" << x_dims << " y.shape=" << y_dims << " beta=" << beta
          << " alpha=" << alpha << " ndim_input=" << ndim_input
          << " ndim_x=" << ndim_x << " ndim_y=" << ndim_y;

  // dim check
  PADDLE_ENFORCE_EQ(ndim_input == 2 || ndim_input == 1,
                    true,
                    errors::InvalidArgument(
                        "The input tensor input's dimension must be 2 or 1. "
                        "But received input's dimension = [%d].",
                        ndim_input));
  PADDLE_ENFORCE_EQ(
      ndim_x,
      2,
      errors::InvalidArgument("The input tensor x's dimension must be 2. "
                              "But received x's dimension = [%d].",
                              ndim_x));
  PADDLE_ENFORCE_EQ(
      ndim_y,
      2,
      errors::InvalidArgument("The input tensor y's dimension must be 2. "
                              "But received y's dimension = [%d].",
                              ndim_y));

  std::vector<int64_t> output_dims;
  output_dims.push_back(x_dims[0]);
  output_dims.push_back(y_dims[1]);

  out->set_dims(common::make_ddim(output_dims));
  out->share_lod(input);
  out->set_dtype(input.dtype());
}

void BaddbmmInferMeta(const MetaTensor& input,
                      const MetaTensor& x,
                      const MetaTensor& y,
                      float beta,
                      float alpha,
                      MetaTensor* out) {
  auto input_dims = input.dims();
  auto x_dims = x.dims();
  auto y_dims = y.dims();

  auto ndim_input = input_dims.size();
  auto ndim_x = x_dims.size();
  auto ndim_y = y_dims.size();

  VLOG(3) << "baddbmm operator input.shape=" << input_dims
          << " x.shape=" << x_dims << " y.shape=" << y_dims << " beta=" << beta
          << " alpha=" << alpha << " ndim_input=" << ndim_input
          << " ndim_x=" << ndim_x << " ndim_y=" << ndim_y;

  PADDLE_ENFORCE_NE(
      product(input_dims),
      0,
      errors::PreconditionNotMet("The Input variable 'input' has not "
                                 "been initialized. You may need to confirm "
                                 "if you put exe.run(startup_program) "
                                 "after optimizer.minimize function."));

  PADDLE_ENFORCE_NE(
      product(x_dims),
      0,
      errors::PreconditionNotMet("The Input variable 'x' has not "
                                 "been initialized. You may need to confirm "
                                 "if you put exe.run(startup_program) "
                                 "after optimizer.minimize function."));

  PADDLE_ENFORCE_NE(
      product(y_dims),
      0,
      errors::PreconditionNotMet("The Input variable 'y' has not "
                                 "been initialized. You may need to confirm "
                                 "if you put exe.run(startup_program) "
                                 "after optimizer.minimize function."));
  // dim check
  PADDLE_ENFORCE_EQ(ndim_input == 3 || ndim_input == 2,
                    true,
                    errors::InvalidArgument(
                        "The input tensor input's dimension must be 3 or 2. "
                        "But received input's dimension = [%d].",
                        ndim_input));
  PADDLE_ENFORCE_EQ(
      ndim_x,
      3,
      errors::InvalidArgument("The input tensor x's dimension must be 3. "
                              "But received x's dimension = [%d].",
                              ndim_x));
  PADDLE_ENFORCE_EQ(
      ndim_y,
      3,
      errors::InvalidArgument("The input tensor y's dimension must be 3. "
                              "But received y's dimension = [%d].",
                              ndim_y));

  PADDLE_ENFORCE_EQ(
      x_dims[2],
      y_dims[1],
      errors::InvalidArgument("The second dimension of x must be equal to the "
                              "first dimension of y. "
                              "But received x's second dimension = [%d], y's "
                              "first dimension = [%d].",
                              x_dims[2],
                              y_dims[1]));

  std::vector<int64_t> output_dims;
  output_dims.push_back(x_dims[0]);
  output_dims.push_back(x_dims[1]);
  output_dims.push_back(y_dims[2]);

  out->set_dims(common::make_ddim(output_dims));
  out->share_lod(input);
  out->set_dtype(input.dtype());
}

void AffineChannelInferMeta(const MetaTensor& x,
                            const MetaTensor& scale,
                            const MetaTensor& bias,
                            const std::string& data_layout_in,
                            MetaTensor* out,
                            MetaConfig config) {
  const auto& x_dims = x.dims();
  const auto& scale_dims = scale.dims();
  const auto& b_dims = bias.dims();
  const phi::DataLayout data_layout =
      common::StringToDataLayout(data_layout_in);

  const int64_t C =
      (data_layout == phi::DataLayout::kNCHW ? x_dims[1]
                                             : x_dims[x_dims.size() - 1]);

  PADDLE_ENFORCE_EQ(scale_dims.size(),
                    1UL,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(Scale) must be 1,"
                        "But received the dimensions of Input(Scale) is [%d] ",
                        scale_dims.size()));
  PADDLE_ENFORCE_EQ(b_dims.size(),
                    1UL,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(Bias) must be 1,"
                        "But received the dimensions of Input(Bias) is [%d] ",
                        scale_dims.size()));
  if (config.is_runtime || scale_dims[0] > 0) {
    PADDLE_ENFORCE_EQ(
        scale_dims[0],
        C,
        common::errors::InvalidArgument(
            "The first dimension value of Input(Scale) must be [%d],"
            "But received [%d].",
            C,
            scale_dims[0]));
  }
  if (config.is_runtime || b_dims[0] > 0) {
    PADDLE_ENFORCE_EQ(
        b_dims[0],
        C,
        common::errors::InvalidArgument(
            "The first dimension value of Input(Bias) must be [%d],"
            "But received [%d].",
            C,
            b_dims[0]));
  }

  out->set_dims(x.dims());
  out->share_lod(x);
  out->set_dtype(x.dtype());
}

void AssignPosInferMeta(const MetaTensor& x,
                        const MetaTensor& cum_count,
                        const MetaTensor& eff_num_len,
                        MetaTensor* out) {
  phi::DataType X_dtype = x.dtype();
  phi::DataType cum_count_dtype = cum_count.dtype();

  PADDLE_ENFORCE_EQ(cum_count_dtype,
                    X_dtype,
                    common::errors::InvalidArgument(
                        "The dtype of the cum_count and X should be same"));
  PADDLE_ENFORCE_EQ(cum_count_dtype,
                    phi::DataType::INT64,
                    common::errors::InvalidArgument(
                        "The dtype of the cum_count_dtype, eff_num_len and "
                        "X should be same as int64"));
  out->set_dtype(X_dtype);
}

void BatchFCInferMeta(const MetaTensor& input,
                      const MetaTensor& w,
                      const MetaTensor& bias,
                      MetaTensor* out) {
  auto input_dims = input.dims();
  auto w_dims = w.dims();

  PADDLE_ENFORCE_EQ(
      input_dims.size(),
      3,
      common::errors::InvalidArgument("Input of BatchFCOp should have 3D."));
  PADDLE_ENFORCE_EQ(
      w_dims.size(),
      3,
      common::errors::InvalidArgument("W of BatchFCOp should have 3D."));
  PADDLE_ENFORCE_EQ(
      input_dims[0],
      w_dims[0],
      common::errors::InvalidArgument(
          "Input.dim[0] and W.dim[0] of BatchFCOp should be same."));
  PADDLE_ENFORCE_EQ(
      input_dims[2],
      w_dims[1],
      common::errors::InvalidArgument(
          "Input.dim[2] and W.dim[1] of BatchFCOp should be same."));

  auto bias_dims = bias.dims();
  PADDLE_ENFORCE_EQ(bias_dims[0],
                    input_dims[0],
                    common::errors::InvalidArgument(
                        "Bias.dim[0] should be same as input.dim[0]."));
  PADDLE_ENFORCE_EQ(bias_dims[1],
                    w_dims[2],
                    common::errors::InvalidArgument(
                        "Bias.dim[1] should be same as input.dim[2]."));

  out->set_dims({input_dims[0], input_dims[1], w_dims[2]});
  out->share_lod(input);
  out->set_dtype(input.dtype());
}

void BoxCoderInferMeta(const MetaTensor& prior_box,
                       const MetaTensor& prior_box_var,
                       const MetaTensor& target_box,
                       const std::string& code_type,
                       bool box_normalized,
                       int axis,
                       const std::vector<float>& variance,
                       MetaTensor* output_box,
                       MetaConfig config) {
  auto prior_box_dims = prior_box.dims();
  auto target_box_dims = target_box.dims();

  if (config.is_runtime) {
    PADDLE_ENFORCE_EQ(prior_box_dims.size(),
                      2,
                      common::errors::InvalidArgument(
                          "The rank of Input PriorBox in BoxCoder operator "
                          "must be 2. But received rank = %d",
                          prior_box_dims.size()));
    PADDLE_ENFORCE_EQ(prior_box_dims[1],
                      4,
                      common::errors::InvalidArgument(
                          "The second dimension of PriorBox in BoxCoder "
                          "operator must be 4. But received dimension = %d",
                          prior_box_dims[1]));
    if (prior_box_var) {
      auto prior_box_var_dims = prior_box_var.dims();
      PADDLE_ENFORCE_EQ(
          prior_box_var_dims.size(),
          2,
          common::errors::InvalidArgument(
              "The rank of Input(PriorBoxVar) in BoxCoder operator"
              " should be 2. But received rank = %d",
              prior_box_var_dims.size()));
      PADDLE_ENFORCE_EQ(
          prior_box_dims,
          prior_box_var_dims,
          common::errors::InvalidArgument(
              "The dimension of Input(PriorBoxVar) should be equal to "
              "the dimension of Input(PriorBox) in BoxCoder operator "
              "when the rank is 2."));
    }
  }

  auto box_code_type = phi::funcs::GetBoxCodeType(code_type);
  if (box_code_type == phi::funcs::BoxCodeType::kEncodeCenterSize) {
    PADDLE_ENFORCE_EQ(target_box_dims.size(),
                      2,
                      common::errors::InvalidArgument(
                          "The rank of Input TargetBox in BoxCoder operator "
                          "must be 2. But received rank is %d",
                          target_box_dims.size()));
    PADDLE_ENFORCE_EQ(target_box_dims[1],
                      4,
                      common::errors::InvalidArgument(
                          "The second dimension of TargetBox in BoxCoder "
                          "operator is 4. But received dimension is %d",
                          target_box_dims[1]));
    output_box->set_dims({target_box_dims[0], prior_box_dims[0], 4});
  } else if (box_code_type == phi::funcs::BoxCodeType::kDecodeCenterSize) {
    PADDLE_ENFORCE_EQ(target_box_dims.size(),
                      3,
                      common::errors::InvalidArgument(
                          "The rank of Input TargetBox in BoxCoder "
                          "operator must be 3. But received rank is %d",
                          target_box_dims.size()));
    PADDLE_ENFORCE_EQ(axis == 0 || axis == 1,
                      true,
                      common::errors::InvalidArgument(
                          "axis in BoxCoder operator must be 0 or 1."
                          "But received axis = %d",
                          axis));
    if (config.is_runtime) {
      if (axis == 0) {
        PADDLE_ENFORCE_EQ(
            target_box_dims[1],
            prior_box_dims[0],
            common::errors::InvalidArgument(
                "When axis is 0, The second "
                "dimension of TargetBox in BoxCoder "
                "should be equal to the first dimension of PriorBox."));
      } else if (axis == 1) {
        PADDLE_ENFORCE_EQ(
            target_box_dims[0],
            prior_box_dims[0],
            common::errors::InvalidArgument(
                "When axis is 1, The first "
                "dimension of TargetBox in BoxCoder "
                "should be equal to the first dimension of PriorBox."));
      }
      PADDLE_ENFORCE_EQ(
          target_box_dims[2],
          prior_box_dims[1],
          common::errors::InvalidArgument("The third dimension of TargetBox"
                                          " in BoxCoder should be equal to the "
                                          "second dimension of PriorBox."));
    }
    output_box->share_dims(target_box);
  }

  if (box_code_type == phi::funcs::BoxCodeType::kDecodeCenterSize &&
      axis == 1) {
    output_box->share_lod(prior_box);
  } else {
    output_box->share_lod(target_box);
  }
  output_box->set_dtype(target_box.dtype());
}

void CrossEntropyWithSoftmaxBwdWithDowncastInferMeta(
    const MetaTensor& label,
    const MetaTensor& softmax,
    const MetaTensor& loss_grad,
    MetaTensor* logits_grad) {
  int axis = -1;
  auto softmax_dims = softmax.dims();
  auto labels_dims = label.dims();
  auto softmax_rank = softmax_dims.size();
  PADDLE_ENFORCE_EQ(
      axis,
      -1,
      common::errors::InvalidArgument("Attr(axis) value should be -1"));
  PADDLE_ENFORCE_EQ(
      softmax.dtype(),
      phi::DataType::FLOAT32,
      common::errors::InvalidArgument("softmax dtype should be float32"));

  axis = phi::funcs::CanonicalAxis(axis, softmax_rank);
  for (int i = 0; i < softmax_rank; i++) {
    if (i != axis) {
      PADDLE_ENFORCE_EQ(
          softmax_dims[i],
          labels_dims[i],
          common::errors::InvalidArgument(
              "Input(Logits) and Input(Label) should in same shape in "
              "dimensions except axis."));
    }
  }

  PADDLE_ENFORCE_EQ(
      labels_dims[axis],
      1UL,
      common::errors::InvalidArgument("If Attr(soft_label) == false, "
                                      "the axis dimension of "
                                      "Input(Label) should be 1."));

  logits_grad->set_dims(softmax.dims());
  logits_grad->set_dtype(phi::DataType::BFLOAT16);
}

void CSoftmaxWithMultiLabelCrossEntropyInferMeta(
    const MetaTensor& logits,
    const MetaTensor& label,
    const MetaTensor& smooth_weight,
    int64_t ignore_index,
    bool sum_multi_label_loss,
    int rank,
    int nranks,
    MetaTensor* softmax,
    MetaTensor* loss,
    MetaConfig config) {
  auto logits_dims = logits.dims();
  auto labels_dims = label.dims();
  auto smooth_weight_dims = smooth_weight.dims();

  auto logits_rank = logits_dims.size();
  auto labels_rank = labels_dims.size();
  auto axis = logits_rank - 1;
  for (int i = 0; i < logits_rank; i++) {
    if (i != axis) {
      if (config.is_runtime || (logits_dims[i] > 0 && labels_dims[i] > 0)) {
        PADDLE_ENFORCE_EQ(logits_dims[i],
                          labels_dims[i],
                          common::errors::InvalidArgument(
                              "Input(Logits) and Input(Label) should in "
                              "same shape in dimensions except axis."));
      }
    }
  }

  PADDLE_ENFORCE_GE(
      labels_dims[logits_rank - 1],
      1UL,
      common::errors::InvalidArgument(
          "the last dimension of Input(Label) should be greater than or equal "
          "to 1."
          "But received: the last dimension of Input(Label) is [%d],"
          "the last dimension is [%d]",
          labels_dims[logits_rank - 1],
          logits_rank - 1));

  for (int i = 0; i < labels_rank; ++i) {
    if (config.is_runtime ||
        (labels_dims[i] > 0 && smooth_weight_dims[i] > 0)) {
      PADDLE_ENFORCE_EQ(labels_dims[i],
                        smooth_weight_dims[i],
                        common::errors::InvalidArgument(
                            "Input(Label) and Input(SmoothWeight) should in "
                            "same shape in dimensions"));
    }
  }

  softmax->set_dims(logits_dims);
  if (sum_multi_label_loss) {
    labels_dims[axis] = 1;
  }
  loss->set_dims(labels_dims);
  softmax->share_lod(logits);
  loss->share_lod(logits);
}

void DistributedPushSparseInferMeta(
    const std::vector<const MetaTensor*>& ids,
    const std::vector<const MetaTensor*>& shows,
    const std::vector<const MetaTensor*>& clicks,
    int table_id,
    int size,
    bool is_distributed,
    const std::string& push_sparse_version,
    int64_t padding_idx,
    DataType dtype,
    bool is_test,
    bool use_cvm_op,
    std::vector<MetaTensor*> output) {
  auto ids_size = ids.size();
  std::vector<DDim> ids_dims;
  ids_dims.reserve(ids.size());
  for (size_t i = 1; i < ids_size; ++i) {
    PADDLE_ENFORCE_EQ(ids_dims[i].size(),
                      2,
                      common::errors::InvalidArgument(
                          "The dimension of the 'Ids' tensor must be 2."));
  }

  for (auto& out : output) {
    if (out == nullptr) {
      continue;
    }
    out->set_dtype(ids[0]->dtype());
  }
}

void DpsgdInferMeta(const MetaTensor& param,
                    const MetaTensor& grad,
                    const MetaTensor& learning_rate,
                    float clip,
                    float batch_size,
                    float sigma,
                    int size,
                    MetaTensor* param_out) {
  auto lr_dims = learning_rate.dims();
  PADDLE_ENFORCE_EQ(common::product(lr_dims),
                    1,
                    common::errors::InvalidArgument(
                        "Learning rate should have 1 dimension. But Received "
                        "LearningRate's dims [%s].",
                        common::product(lr_dims)));
  auto param_dims = param.dims();
  PADDLE_ENFORCE_EQ(
      param_dims,
      grad.dims(),
      common::errors::InvalidArgument(
          "Param and Grad input of DpsgdOp should have same dimension. But "
          "received Para's dim [%s] and Grad's dim [%s].",
          param_dims,
          grad.dims()));
  param_out->set_dims(param_dims);
}

void FastLayerNormInfermeta(const MetaTensor& x,
                            const MetaTensor& scale,
                            const MetaTensor& bias,
                            float epsilon,
                            MetaTensor* y,
                            MetaTensor* mean,
                            MetaTensor* invvar) {
  auto x_dim = x.dims();
  auto x_ndim = x_dim.size();

  auto matrix_dim = common::flatten_to_2d(x_dim, x_ndim - 1);

  int64_t right = matrix_dim[1];
  if (scale) {
    PADDLE_ENFORCE_EQ(scale.dims().size(),
                      1,
                      common::errors::InvalidArgument(
                          "The dimensions of Input(Scale) must be 1, but "
                          "received dimensions of "
                          "Input(Scale) is [%d]",
                          scale.dims().size()));
  }

  PADDLE_ENFORCE_EQ(
      scale.dims()[0],
      right,
      common::errors::InvalidArgument(
          "The first dimension value of Input(Scale) must equal to be the "
          "second dimension value of the flattened 2D matrix of Input(X), "
          "But received the first dimension value of Input(Scale) is "
          "[%d], the second dimension value of the flattened 2D matrix of "
          " Input(Scale) is [%d].",
          scale.dims()[0],
          right));
  if (bias) {
    PADDLE_ENFORCE_EQ(bias.dims().size(),
                      1,
                      common::errors::InvalidArgument(
                          "The dimensions of Input(Bias) must be 1, but "
                          "received dimensions of "
                          "Input(Bias) is [%d]",
                          bias.dims().size()));
  }
  PADDLE_ENFORCE_EQ(
      bias.dims()[0],
      right,
      common::errors::InvalidArgument(
          "The first dimension value of Input(Bias) must equal to be the "
          "second dimension value of the flattened 2D matrix of Input(X), "
          "But received the first dimension value of Input(Bias) is "
          "[%d], the second dimension value of the flattened 2D matrix of "
          " Input(Bias) is [%d].",
          bias.dims()[0],
          right));

  PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f,
                    true,
                    common::errors::InvalidArgument(
                        "'epsilon' in Op(LayerNorm) should be between"
                        "0.0 and 0.001, But received [%s].",
                        epsilon));

  phi::DataType x_dtype = x.dtype();
  phi::DataType scale_dtype = scale.dtype();
  y->set_dims(x_dim);
  y->set_dtype(scale_dtype);

  auto row_shape = slice_ddim(x_dim, 0, x_dim.size() - 1);
  mean->set_dims({row_shape});
  mean->set_dtype(paddle::DataType::FLOAT32);
  invvar->set_dims({row_shape});
  invvar->set_dtype(paddle::DataType::FLOAT32);
}
void FakeQuantizeRangeAbsMaxInferMeta(const MetaTensor& x,
                                      const MetaTensor& in_scale,
                                      const MetaTensor& iter,
                                      int window_size,
                                      int bit_length,
                                      bool is_test,
                                      int round_type,
                                      MetaTensor* out,
                                      MetaTensor* out_scale,
                                      MetaTensor* out_scales) {
  PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16,
                    true,
                    common::errors::InvalidArgument(
                        "'bit_length' should be between 1 and 16, but "
                        "the received is %d",
                        bit_length));
  if (out_scales) {
    out_scales->set_dims({window_size});
  }
  out->set_dims(x.dims());
  out_scale->set_dims({1});
  out->share_lod(x);
}

void FlashAttnInferMeta(const MetaTensor& q,
                        const MetaTensor& k,
                        const MetaTensor& v,
                        MetaTensor* out,
                        MetaTensor* softmax,
                        MetaTensor* softmax_lse,
                        MetaTensor* seed_offset) {
  auto out_dims = q.dims();
  if (out_dims.size() == 4) {
    out_dims[3] = v.dims()[3];
  }
  out->set_dims(out_dims);
  out->set_dtype(q.dtype());
  out->set_layout(q.layout());
  softmax->set_dtype(q.dtype());
  softmax_lse->set_dtype(q.dtype());
  if (out_dims.size() == 4) {
    auto round_multiple = [](int64_t x) { return (x + 127) / 128 * 128; };
    int64_t batch_size = q.dims()[0];
    int64_t num_heads = q.dims()[2];
    int64_t seqlen_q_rounded = round_multiple(q.dims()[1]);
    int64_t seqlen_k_rounded = round_multiple(k.dims()[1]);
    if (softmax) {
      softmax->set_dims(
          {batch_size, num_heads, seqlen_q_rounded, seqlen_k_rounded});
    }
    if (softmax_lse) {
      softmax_lse->set_dims({batch_size, num_heads, seqlen_q_rounded});
    }
  }
  if (out_dims.size() == 3) {  // when use flash_attn_unpadded
    auto round_multiple = [](int64_t x) { return (x + 127) / 128 * 128; };
    int64_t batch_and_seq_size = q.dims()[0];
    int64_t num_heads = q.dims()[1];
    int64_t seqlen_q_rounded = round_multiple(batch_and_seq_size);
    int64_t seqlen_k_rounded = round_multiple(batch_and_seq_size);
    if (softmax) {
      softmax->set_dims({num_heads, seqlen_q_rounded, seqlen_k_rounded});
    }
    if (softmax_lse) {
      softmax_lse->set_dims({num_heads, seqlen_q_rounded});
    }
  }
  if (seed_offset) {
    seed_offset->set_dtype(phi::DataType::INT64);
    seed_offset->set_dims({2});
  }
}
void FlashAttnQKVPackedInferMeta(const MetaTensor& qkv,
                                 MetaTensor* out,
                                 MetaTensor* softmax,
                                 MetaTensor* softmax_lse,
                                 MetaTensor* seed_offset) {
  const auto& qkvdims = qkv.dims();
  PADDLE_ENFORCE(qkvdims.size() == 4 || qkvdims.size() == 5,
                 common::errors::InvalidArgument(
                     "qkv dims must be 4(unpadded) or 5(padded batch)"));
  // qkv [total_*,nheads/nheads_k+2,nheads_k,headdim]
  auto out_dims = DDim({qkvdims[0], (qkvdims[1] - 2) * qkvdims[2], qkvdims[3]});
  if (qkvdims.size() == 5) {
    // qkv [batchsize,seqlen,nheads/nheads_k+2,nheads_k,headdim]
    out_dims =
        DDim{qkvdims[0], qkvdims[1], (qkvdims[2] - 2) * qkvdims[3], qkvdims[4]};
  }
  out->set_dims(out_dims);
  out->set_dtype(qkv.dtype());
  out->set_layout(qkv.layout());
  softmax->set_dtype(qkv.dtype());
  softmax_lse->set_dtype(qkv.dtype());
  if (seed_offset) {
    seed_offset->set_dtype(phi::DataType::INT64);
  }
}

void CalcReducedAttnScoresInferMeta(const MetaTensor& q,
                                    const MetaTensor& k,
                                    const MetaTensor& softmax_lse,
                                    MetaTensor* reduced_scores) {
  PADDLE_ENFORCE(q.dims().size() == 4,
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input q with dim "
                     "[batch_size, seq_len, num_heads, head_dim]"));

  PADDLE_ENFORCE(k.dims().size() == 4,
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input k with dim "
                     "[batch_size, seq_len, num_heads, head_dim]"));

  PADDLE_ENFORCE(
      softmax_lse.dims().size() == 3,
      common::errors::InvalidArgument(
          "calc_reduced_attn_scores must receive input softmax_lse with dim "
          "[batch_size, num_heads, seq_len_q]"));

  PADDLE_ENFORCE(q.dims()[0] == k.dims()[0],
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input q and k "
                     "with consistent batch_size!"));

  PADDLE_ENFORCE(q.dims()[0] == softmax_lse.dims()[0],
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input q and "
                     "softmax_lse with consistent batch_size!"));

  auto round_multiple = [](int64_t x, int64_t m) {
    return (x + m - 1) / m * m;
  };
  PADDLE_ENFORCE(round_multiple(q.dims()[1], 128) == softmax_lse.dims()[2],
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input q and "
                     "softmax_lse with corresponding seq_len!"));

  PADDLE_ENFORCE(q.dims()[2] == softmax_lse.dims()[1],
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input q and "
                     "softmax_lse with consistent num_heads!"));

  PADDLE_ENFORCE(q.dims()[3] == k.dims()[3],
                 common::errors::InvalidArgument(
                     "calc_reduced_attn_scores must receive input q and k "
                     "with consistent head_dim!"));

  int64_t batch_size = q.dims()[0];
  int64_t num_heads = q.dims()[2];
  int64_t seqlen_k = k.dims()[1];

  reduced_scores->set_dtype(phi::DataType::FLOAT32);
  reduced_scores->set_dims({batch_size, num_heads, 1, seqlen_k});
}

void FlashMaskV2InferMeta(const MetaTensor& q,
                          const MetaTensor& k,
                          const MetaTensor& v,
                          MetaTensor* out,
                          MetaTensor* softmax_lse) {
  const int64_t batch_size = q.dims()[0];
  const int64_t seqlen_q = q.dims()[1];
  const int64_t num_heads = q.dims()[q.dims().size() - 2];
  const int64_t head_size_v = v.dims()[v.dims().size() - 1];
  auto q_type = q.dtype();
  auto out_type =
      q_type == phi::DataType::FLOAT8_E4M3FN ? phi::DataType::BFLOAT16 : q_type;

  out->set_dims({batch_size, seqlen_q, num_heads, head_size_v});

  out->set_dtype(out_type);

  softmax_lse->set_dims({batch_size, num_heads, seqlen_q});
  softmax_lse->set_dtype(phi::DataType::FLOAT32);
}

void FlashAttnV3InferMeta(const MetaTensor& q,
                          const MetaTensor& k,
                          const MetaTensor& v,
                          MetaTensor* out,
                          MetaTensor* softmax_lse) {
  const int64_t batch_size = q.dims()[0];
  const int64_t seqlen_q = q.dims()[1];
  const int64_t num_heads = q.dims()[q.dims().size() - 2];
  const int64_t head_size_v = v.dims()[v.dims().size() - 1];
  auto q_type = q.dtype();
  auto out_type =
      q_type == phi::DataType::FLOAT8_E4M3FN ? phi::DataType::BFLOAT16 : q_type;

  out->set_dims({batch_size, seqlen_q, num_heads, head_size_v});

  out->set_dtype(out_type);

  softmax_lse->set_dims({batch_size, num_heads, seqlen_q});
  softmax_lse->set_dtype(phi::DataType::FLOAT32);
}

void FlashAttnV3VarlenInferMeta(const MetaTensor& q,
                                const MetaTensor& k,
                                const MetaTensor& v,
                                MetaTensor* out,
                                MetaTensor* softmax_lse) {
  const int64_t total_q = q.dims()[0];
  const int64_t num_heads = q.dims()[q.dims().size() - 2];
  const int64_t head_size_v = v.dims()[v.dims().size() - 1];
  auto q_type = q.dtype();
  auto out_type =
      q_type == phi::DataType::FLOAT8_E4M3FN ? phi::DataType::BFLOAT16 : q_type;
  out->set_dims({total_q, num_heads, head_size_v});
  softmax_lse->set_dims({num_heads, total_q});
  out->set_dtype(out_type);
  softmax_lse->set_dtype(phi::DataType::FLOAT32);
}

void ArangeTensorInferMeta(const MetaTensor& start,
                           const MetaTensor& end,
                           const MetaTensor& step,
                           MetaTensor* out) {
  PADDLE_ENFORCE_EQ(common::product(start.dims()),
                    1,
                    common::errors::InvalidArgument(
                        "The numel of Input(start) should be 1, but got %d",
                        common::product(start.dims())));

  PADDLE_ENFORCE_EQ(common::product(end.dims()),
                    1,
                    common::errors::InvalidArgument(
                        "The numel of Input(end) should be 1, but got %d",
                        common::product(end.dims())));

  PADDLE_ENFORCE_EQ(common::product(step.dims()),
                    1,
                    common::errors::InvalidArgument(
                        "The numel of Input(step) should be 1, but got %d",
                        common::product(step.dims())));

  out->set_dims({-1});
  out->set_dtype(start.dtype());
}

void RangeTensorInferMeta(const MetaTensor& start,
                          const MetaTensor& end,
                          const MetaTensor& step,
                          MetaTensor* out) {
  PADDLE_ENFORCE_EQ(common::product(start.dims()),
                    1,
                    common::errors::InvalidArgument(
                        "The numel of Input(start) should be 1, but got %d",
                        common::product(start.dims())));

  PADDLE_ENFORCE_EQ(common::product(end.dims()),
                    1,
                    common::errors::InvalidArgument(
                        "The numel of Input(end) should be 1, but got %d",
                        common::product(end.dims())));

  PADDLE_ENFORCE_EQ(common::product(step.dims()),
                    1,
                    common::errors::InvalidArgument(
                        "The numel of Input(step) should be 1, but got %d",
                        common::product(step.dims())));

  out->set_dims({-1});
  out->set_dtype(start.dtype());
}

void CollectFpnProposalsInferMeta(
    const std::vector<const MetaTensor*>& multi_level_rois,
    const std::vector<const MetaTensor*>& multi_level_scores,
    const paddle::optional<std::vector<const MetaTensor*>>&
        multi_level_rois_num,
    int post_nms_topn,
    MetaTensor* fpn_rois,
    MetaTensor* rois_num,
    MetaConfig config) {
  std::vector<int64_t> out_dims;
  for (auto& roi : multi_level_rois) {
    const auto& roi_dim = roi->dims();
    PADDLE_ENFORCE_EQ(
        roi_dim[1],
        4,
        common::errors::InvalidArgument(
            "Second dimension of Input"
            "(MultiLevelRois) must be 4. But received dimension = %d",
            roi_dim[1]));
  }
  for (auto& score : multi_level_scores) {
    const auto& score_dim = score->dims();
    PADDLE_ENFORCE_EQ(
        score_dim[1],
        1,
        common::errors::InvalidArgument(
            "Second dimension of Input"
            "(MultiLevelScores) must be 1. But received dimension = %d",
            score_dim[1]));
  }
  fpn_rois->set_dims({post_nms_topn, 4});
  fpn_rois->set_dtype(multi_level_rois[0]->dtype());
  if (rois_num != nullptr) {
    rois_num->set_dims({-1});
    rois_num->set_dtype(DataType::INT32);
  }
  if (!config.is_runtime) {  // Runtime LoD infershape will be computed
    // in Kernel.
    fpn_rois->share_lod(*multi_level_rois[0]);
  }
}

void InstanceNormInferMeta(const MetaTensor& x,
                           const MetaTensor& scale,
                           const MetaTensor& bias,
                           float epsilon,
                           MetaTensor* y,
                           MetaTensor* saved_mean,
                           MetaTensor* saved_variance,
                           MetaConfig config) {
  PADDLE_ENFORCE_NE(y,
                    nullptr,
                    common::errors::InvalidArgument(
                        "The y in InstanceNormInferMeta can't be nullptr."));
  const auto x_dims = x.dims();
  PADDLE_ENFORCE_GE(
      x_dims.size(),
      2,
      common::errors::InvalidArgument(
          "ShapeError: the dimension of input X must "
          "greater than or equal to 2. But received: the shape of input "
          "X = [%s], the dimension of input X =[%d]",
          x_dims,
          x_dims.size()));
  PADDLE_ENFORCE_LE(
      x_dims.size(),
      5,
      common::errors::InvalidArgument(
          "ShapeError: the dimension of input X must "
          "smaller than or equal to 5, But received: the shape of input "
          "X = [%s], the dimension of input X = [%d]",
          x_dims,
          x_dims.size()));
  auto N = x_dims[0];
  auto C = x_dims[1];
  auto NxC = N * C;
  if (scale) {
    auto scale_dim = scale.dims();
    PADDLE_ENFORCE_EQ(
        scale_dim.size(),
        1UL,
        common::errors::InvalidArgument(
            "ShapeError: the dimension of scale must equal to 1."
            "But received: the shape of scale is [%s], the dimension "
            "of scale is [%d]",
            scale_dim,
            scale_dim.size()));
    bool check = config.is_runtime || contain_unknown_dim(scale_dim);
    if (check) {
      if (C != 0) {
        PADDLE_ENFORCE_EQ(
            scale_dim[0],
            C,
            common::errors::InvalidArgument(
                "ShapeError: the shape of scale must equal to [%d]"
                "But received: the shape of scale is [%d]",
                C,
                scale_dim[0]));
      }
    }
  }
  if (bias) {
    auto bias_dim = bias.dims();
    PADDLE_ENFORCE_EQ(
        bias_dim.size(),
        1UL,
        common::errors::InvalidArgument(
            "ShapeError: the dimension of bias must equal to 1."
            "But received: the shape of bias is [%s],the dimension "
            "of bias is [%d]",
            bias_dim,
            bias_dim.size()));
    bool check = config.is_runtime || !contain_unknown_dim(bias_dim);
    if (check) {
      if (C != 0) {
        PADDLE_ENFORCE_EQ(bias_dim[0],
                          C,
                          common::errors::InvalidArgument(
                              "ShapeError: the shape of bias must equal to [%d]"
                              "But received: the shape of bias is [%d]",
                              C,
                              bias_dim[0]));
      }
    }
  }
  y->set_dims(x_dims);
  y->share_lod(x);
  y->set_dtype(x.dtype());
  y->set_layout(x.layout());
  phi::DataType x_dtype = x.dtype();
  phi::DataType param_type =
      (x_dtype == phi::DataType::BFLOAT16 || x_dtype == phi::DataType::FLOAT16)
          ? phi::DataType::FLOAT32
          : x_dtype;
  if (saved_mean) {
    saved_mean->set_dims({NxC});
    saved_mean->set_dtype(param_type);
  }
  if (saved_variance) {
    saved_variance->set_dims({NxC});
    saved_variance->set_dtype(param_type);
  }
}

void FasterTokenizerInferMeta(const MetaTensor& vocab,
                              const MetaTensor& text,
                              const MetaTensor& text_pair,
                              bool do_lower_case,
                              bool is_split_into_words,
                              int max_seq_len,
                              bool pad_to_max_seq_len,
                              MetaTensor* input_ids,
                              MetaTensor* segment_ids,
                              MetaConfig config) {
  input_ids->set_dims({-1, -1});
  segment_ids->set_dims({-1, -1});
  input_ids->set_dtype(phi::DataType::INT64);
  segment_ids->set_dtype(phi::DataType::INT64);
}

void GlobalGatherInferMeta(const MetaTensor& x,
                           const MetaTensor& local_count,
                           const MetaTensor& global_count,
                           MetaTensor* out) {
  auto input_dims = x.dims();
  auto ndim_input = input_dims.size();
  // dim check
  PADDLE_ENFORCE_EQ(
      ndim_input,
      2,
      common::errors::InvalidArgument("The input tensor's dimension must be 2. "
                                      "But received input's dimension = %d.",
                                      ndim_input));
  phi::DDim out_dims = common::make_ddim({-1, -1});
  out->set_dims(out_dims);
  out->set_dtype(x.dtype());
}

void GlobalScatterInferMeta(const MetaTensor& x,
                            const MetaTensor& local_count,
                            const MetaTensor& global_count,
                            MetaTensor* out) {
  auto input_dims = x.dims();
  auto ndim_input = input_dims.size();
  // dim check
  PADDLE_ENFORCE_EQ(
      ndim_input,
      2,
      common::errors::InvalidArgument("The input tensor's dimension must be 2. "
                                      "But received input's dimension = %d.",
                                      ndim_input));

  phi::DDim out_dims = common::make_ddim({-1, -1});
  out->set_dims(out_dims);
  out->set_dtype(x.dtype());
}

void AddGroupNormSiluInferMeta(const MetaTensor& x,
                               const MetaTensor& residual,
                               const MetaTensor& scale,
                               const MetaTensor& bias,
                               float epsilon,
                               int groups,
                               const std::string& data_layout_str,
                               const std::string& activation,
                               MetaTensor* y,
                               MetaTensor* residual_out,
                               MetaTensor* mean,
                               MetaTensor* variance) {
  PADDLE_ENFORCE_NE(y,
                    nullptr,
                    common::errors::InvalidArgument(
                        "The y in GroupNormInferMeta can't be nullptr."));
  PADDLE_ENFORCE_NE(mean,
                    nullptr,
                    common::errors::InvalidArgument(
                        "The mean in GroupNormInferMeta can't be nullptr."));
  PADDLE_ENFORCE_NE(
      variance,
      nullptr,
      common::errors::InvalidArgument(
          "The variance in GroupNormInferMeta can't be nullptr."));

  auto x_dim = x.dims();
  PADDLE_ENFORCE_GE(
      x_dim.size(),
      2,
      common::errors::InvalidArgument(
          "The Input(X)'s dimension of Op(group_norm) must be "
          "greater than 1. But received: %u-D Tensor, which shape is [%s].",
          x_dim.size(),
          x_dim));

  const DataLayout data_layout = common::StringToDataLayout(data_layout_str);
  const int64_t channel_num =
      (data_layout == DataLayout::kNCHW ? x_dim[1] : x_dim[x_dim.size() - 1]);
  auto batch_size = x_dim[0];
  PADDLE_ENFORCE_LE(
      groups,
      channel_num,
      common::errors::InvalidArgument(
          "The Attr(groups) of Op(group_norm) must be less than or "
          "equal to the number of channels. But received: groups "
          "is [%s], channels is [%s], the Attr(data_layout) "
          "is [%s]. The error may come from wrong data_layout setting.",
          groups,
          channel_num,
          data_layout_str));
  PADDLE_ENFORCE_GE(
      groups,
      1,
      common::errors::InvalidArgument(
          "The Attr(groups) of Op(group_norm) must be "
          "greater than or equal to 1. But received: groups is [%s].",
          groups));
  PADDLE_ENFORCE_EQ(
      channel_num % groups,
      0,
      common::errors::InvalidArgument(
          "Expected number of channels in input to be divisible by "
          "num_groups, but got input channel is %d and num_groups is %d",
          channel_num,
          groups));

  if (scale) {
    PADDLE_ENFORCE_EQ(
        scale.dims().size(),
        1UL,
        common::errors::InvalidArgument(
            "The Input(Scale) of Op(group_norm) should be 1-D Tensor. "
            "But received: %u-D Tensor, the shape of Input(Scale) is [%s].",
            scale.dims().size(),
            scale.dims()));
    PADDLE_ENFORCE_EQ(
        scale.dims()[0],
        channel_num,
        common::errors::InvalidArgument(
            "The Input(Scale)'s first dimension size of Op(group_norm) must "
            "be equal to the number of channels. But received: the "
            "Input(Scale)'s first dimension size is [%s], the channels is "
            "[%s], the Attr(data_layout) is [%s]. The error may come "
            "from wrong data_layout setting.",
            scale.dims()[0],
            channel_num,
            data_layout_str));
  }
  if (bias) {
    PADDLE_ENFORCE_EQ(
        bias.dims().size(),
        1UL,
        common::errors::InvalidArgument(
            "The Input(Bias) of Op(group_norm) should be 1-D Tensor. "
            "But received: %u-D Tensor, the shape of Input(Bias) is [%s].",
            bias.dims().size(),
            bias.dims()));
    PADDLE_ENFORCE_EQ(
        bias.dims()[0],
        channel_num,
        common::errors::InvalidArgument(
            "The Input(Bias)'s first dimension size of "
            "Op(group_norm) must be equal to the number of channels. "
            "But received: the Input(Bias)'s first dimension size is [%s], "
            "the channels is [%s], the Attr(data_layout) is [%s]. The "
            "error may come from wrong data_layout setting.",
            bias.dims()[0],
            channel_num,
            data_layout_str));
  }
  y->set_dims(x_dim);
  y->set_dtype(x.dtype());
  y->share_lod(x);

  phi::DataType x_dtype = x.dtype();
  phi::DataType param_type =
      (x_dtype == phi::DataType::BFLOAT16 || x_dtype == phi::DataType::FLOAT16)
          ? phi::DataType::FLOAT32
          : x_dtype;
  if (mean) {
    mean->set_dims({batch_size, groups});
    mean->set_dtype(param_type);
  }
  if (variance) {
    variance->set_dims({batch_size, groups});
    variance->set_dtype(param_type);
  }
  if (residual_out) {
    residual_out->set_dims(x_dim);
    residual_out->set_dtype(x.dtype());
    residual_out->share_lod(x);
  }
}

void GroupNormInferMeta(const MetaTensor& x,
                        const MetaTensor& scale,
                        const MetaTensor& bias,
                        float epsilon,
                        int groups,
                        const std::string& data_layout_str,
                        MetaTensor* y,
                        MetaTensor* mean,
                        MetaTensor* variance,
                        MetaConfig config) {
  PADDLE_ENFORCE_NE(y,
                    nullptr,
                    common::errors::InvalidArgument(
                        "The y in GroupNormInferMeta can't be nullptr."));
  PADDLE_ENFORCE_NE(mean,
                    nullptr,
                    common::errors::InvalidArgument(
                        "The mean in GroupNormInferMeta can't be nullptr."));
  PADDLE_ENFORCE_NE(
      variance,
      nullptr,
      common::errors::InvalidArgument(
          "The variance in GroupNormInferMeta can't be nullptr."));

  auto x_dim = x.dims();
  PADDLE_ENFORCE_GE(
      x_dim.size(),
      2,
      common::errors::InvalidArgument(
          "The Input(X)'s dimension of Op(group_norm) must be "
          "greater than 1. But received: %u-D Tensor, which shape is [%s].",
          x_dim.size(),
          x_dim));

  const DataLayout data_layout = common::StringToDataLayout(data_layout_str);
  const int64_t channel_num =
      (data_layout == DataLayout::kNCHW ? x_dim[1] : x_dim[x_dim.size() - 1]);
  auto batch_size = x_dim[0];
  bool need_check = channel_num != -1 || config.is_runtime;
  if (need_check) {
    PADDLE_ENFORCE_LE(
        groups,
        channel_num,
        common::errors::InvalidArgument(
            "The Attr(groups) of Op(group_norm) must be less than or "
            "equal to the number of channels. But received: groups "
            "is [%s], channels is [%s], the Attr(data_layout) "
            "is [%s]. The error may come from wrong data_layout setting.",
            groups,
            channel_num,
            data_layout_str));
    PADDLE_ENFORCE_GE(
        groups,
        1,
        common::errors::InvalidArgument(
            "The Attr(groups) of Op(group_norm) must be "
            "greater than or equal to 1. But received: groups is [%s].",
            groups));
    PADDLE_ENFORCE_EQ(
        channel_num % groups,
        0,
        common::errors::InvalidArgument(
            "Expected number of channels in input to be divisible by "
            "num_groups, but got input channel is %d and num_groups is %d",
            channel_num,
            groups));
  }

  if (scale && need_check) {
    PADDLE_ENFORCE_EQ(
        scale.dims().size(),
        1UL,
        common::errors::InvalidArgument(
            "The Input(Scale) of Op(group_norm) should be 1-D Tensor. "
            "But received: %u-D Tensor, the shape of Input(Scale) is [%s].",
            scale.dims().size(),
            scale.dims()));
    PADDLE_ENFORCE_EQ(
        scale.dims()[0],
        channel_num,
        common::errors::InvalidArgument(
            "The Input(Scale)'s first dimension size of Op(group_norm) must "
            "be equal to the number of channels. But received: the "
            "Input(Scale)'s first dimension size is [%s], the channels is "
            "[%s], the Attr(data_layout) is [%s]. The error may come "
            "from wrong data_layout setting.",
            scale.dims()[0],
            channel_num,
            data_layout_str));
  }
  if (bias && need_check) {
    PADDLE_ENFORCE_EQ(
        bias.dims().size(),
        1UL,
        common::errors::InvalidArgument(
            "The Input(Bias) of Op(group_norm) should be 1-D Tensor. "
            "But received: %u-D Tensor, the shape of Input(Bias) is [%s].",
            bias.dims().size(),
            bias.dims()));
    PADDLE_ENFORCE_EQ(
        bias.dims()[0],
        channel_num,
        common::errors::InvalidArgument(
            "The Input(Bias)'s first dimension size of "
            "Op(group_norm) must be equal to the number of channels. "
            "But received: the Input(Bias)'s first dimension size is [%s], "
            "the channels is [%s], the Attr(data_layout) is [%s]. The "
            "error may come from wrong data_layout setting.",
            bias.dims()[0],
            channel_num,
            data_layout_str));
  }
  DDim output_dims = x_dim;
  int64_t weight_channel = data_layout == DataLayout::kNCHW
                               ? output_dims[1]
                               : output_dims[x_dim.size() - 1];
  bool need_update = weight_channel < 0;
  if (weight_channel < 0 && scale) {
    weight_channel = scale.dims()[0];
  } else if (weight_channel < 0 && bias) {
    weight_channel = bias.dims()[0];
  }
  if (need_update && weight_channel > 0) {
    if (data_layout == DataLayout::kNCHW) {
      output_dims[1] = weight_channel;
    } else {
      output_dims[x_dim.size() - 1] = weight_channel;
    }
  }

  y->set_dims(output_dims);
  y->set_dtype(x.dtype());
  y->share_lod(x);

  phi::DataType x_dtype = x.dtype();
  phi::DataType param_type =
      (x_dtype == phi::DataType::BFLOAT16 || x_dtype == phi::DataType::FLOAT16)
          ? phi::DataType::FLOAT32
          : x_dtype;
  if (mean) {
    mean->set_dims({batch_size, groups});
    mean->set_dtype(param_type);
  }
  if (variance) {
    variance->set_dims({batch_size, groups});
    variance->set_dtype(param_type);
  }
}

void LayerNormInferMeta(const MetaTensor& x,
                        const MetaTensor& scale,
                        const MetaTensor& bias,
                        float epsilon,
                        int begin_norm_axis,
                        MetaTensor* out,
                        MetaTensor* mean,
                        MetaTensor* variance,
                        MetaConfig config) {
  auto x_dim = x.dims();
  PADDLE_ENFORCE_GT(begin_norm_axis,
                    0,
                    common::errors::InvalidArgument(
                        "'begin_norm_axis' in Op(LayerNorm) should be "
                        "greater than zero. But received [%d].",
                        begin_norm_axis));
  PADDLE_ENFORCE_LT(
      begin_norm_axis,
      x_dim.size(),
      common::errors::InvalidArgument(
          "'begin_norm_axis' must be less than the dimensions of X,"
          "But received 'begin_norm_axis' is [%d],"
          "received the dimensions of X is [%d].",
          begin_norm_axis,
          x_dim.size()));

  auto matrix_dim = common::flatten_to_2d(x_dim, begin_norm_axis);

  // keep the axis size before normalization for shape of variance and mean
  auto before_norm_dims = slice_ddim(x_dim, 0, begin_norm_axis);
  // int64_t left = matrix_dim[0];
  int64_t right = matrix_dim[1];
  if (scale) {
    PADDLE_ENFORCE_EQ(scale.dims().size(),
                      1,
                      common::errors::InvalidArgument(
                          "The dimensions of Input(Scale) must be 1, but "
                          "received dimensions of "
                          "Input(Scale) is [%d]",
                          scale.dims().size()));
  }

  if (config.is_runtime && scale) {
    PADDLE_ENFORCE_EQ(
        scale.dims()[0],
        right,
        common::errors::InvalidArgument(
            "The first dimension value of Input(Scale) must equal to be the "
            "second dimension value of the flattened 2D matrix of Input(X), "
            "But received the first dimension value of Input(Scale) is "
            "[%d], the second dimension value of the flattened 2D matrix of "
            " Input(Scale) is [%d].",
            scale.dims()[0],
            right));
  }
  if (bias) {
    PADDLE_ENFORCE_EQ(bias.dims().size(),
                      1,
                      common::errors::InvalidArgument(
                          "The dimensions of Input(Bias) must be 1, but "
                          "received dimensions of "
                          "Input(Bias) is [%d]",
                          bias.dims().size()));
  }
  if (config.is_runtime && bias) {
    PADDLE_ENFORCE_EQ(
        bias.dims()[0],
        right,
        common::errors::InvalidArgument(
            "The first dimension value of Input(Bias) must equal to be the "
            "second dimension value of the flattened 2D matrix of Input(X), "
            "But received the first dimension value of Input(Bias) is "
            "[%d], the second dimension value of the flattened 2D matrix of "
            " Input(Bias) is [%d].",
            bias.dims()[0],
            right));
  }

  PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f,
                    true,
                    common::errors::InvalidArgument(
                        "'epsilon' in Op(LayerNorm) should be between"
                        "0.0 and 0.001, But received [%s].",
                        epsilon));

  phi::DataType x_dtype = x.dtype();
  out->set_dims(x_dim);
  out->set_dtype(x_dtype);
  out->share_lod(x);

  phi::DataType param_type =
      (x_dtype == phi::DataType::BFLOAT16 || x_dtype == phi::DataType::FLOAT16)
          ? phi::DataType::FLOAT32
          : x_dtype;
  if (mean) {
    mean->set_dims({before_norm_dims});
    mean->set_dtype(param_type);
  }
  if (variance) {
    variance->set_dims({before_norm_dims});
    variance->set_dtype(param_type);
  }
}

void LayerNormGradInferMeta(const MetaTensor& x,
                            const MetaTensor& y,
                            const MetaTensor& z,
                            MetaTensor* dx,
                            MetaTensor* dy,
                            MetaTensor* dz) {
  if (dx) {
    dx->share_meta(x);
  }
  if (dy && y) {
    dy->share_meta(y);
  }
  if (dz && z) {
    dz->share_meta(z);
  }
}

void LerpInferMeta(const MetaTensor& x,
                   const MetaTensor& y,
                   const MetaTensor& weight,
                   MetaTensor* out) {
  auto x_dims = x.dims();
  auto y_dims = y.dims();
  auto w_dims = weight.dims();
  DDim out_dims = funcs::GetOutputDimsForDynamicShape(x_dims, y_dims);
  out_dims = funcs::GetOutputDimsForDynamicShape(out_dims, w_dims);
  out->set_dims(out_dims);
  out->set_dtype(x.dtype());
  out->share_lod(x);
}

void LinspaceInferMeta(const MetaTensor& start,
                       const MetaTensor& stop,
                       const MetaTensor& number,
                       DataType dtype,
                       MetaTensor* out) {
  PADDLE_ENFORCE_EQ(
      common::product(start.dims()),
      1,
      common::errors::InvalidArgument("The size of Input(start) should be 1,"
                                      "but got %d.",
                                      common::product(start.dims())));

  PADDLE_ENFORCE_EQ(
      common::product(stop.dims()),
      1,
      common::errors::InvalidArgument("The size of Input(stop) should be 1,"
                                      "but got %d.",
                                      common::product(stop.dims())));

  PADDLE_ENFORCE_EQ(
      common::product(number.dims()),
      1,
      common::errors::InvalidArgument("The size of Input(number) should be 1,"
                                      "but got %d.",
                                      common::product(number.dims())));

  out->set_dims(common::make_ddim({-1}));
  out->set_dtype(dtype);
}

void MatchMatrixTensorInferMeta(const MetaTensor& x,
                                const MetaTensor& y,
                                const MetaTensor& w,
                                int dim_t,
                                MetaTensor* out,
                                MetaTensor* tmp,
                                MetaConfig config) {
  auto x_dims = x.dims();
  PADDLE_ENFORCE_EQ(x_dims.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(X) should be equal to 2, "
                        "but received %d.",
                        x_dims.size()));

  auto y_dims = y.dims();
  PADDLE_ENFORCE_EQ(y_dims.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(Y) should be equal to 2, "
                        "but received %d.",
                        y_dims.size()));

  auto w_dims = w.dims();
  PADDLE_ENFORCE_EQ(w_dims.size(),
                    3,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(W) should be equal to 3, "
                        "but received %d.",
                        w_dims.size()));

  PADDLE_ENFORCE_EQ(
      w_dims[0],
      x_dims[1],
      common::errors::InvalidArgument(
          "The first dimension of Input(W) should be equal to the second "
          "dimension of Input(X). But received the first dimension of Input(W) "
          "is %d, the second dimension of Input(X) is %d.",
          w_dims[0],
          x_dims[1]));
  PADDLE_ENFORCE_EQ(
      w_dims[1],
      dim_t,
      common::errors::InvalidArgument(
          "The second dimension of Input(W) should be equal to 'dim_t', but "
          "received the second dimension of Input(W) is %d, 'dim_t' is %d.",
          w_dims[1],
          dim_t));
  PADDLE_ENFORCE_EQ(
      w_dims[2],
      y_dims[1],
      common::errors::InvalidArgument(
          "The last dimension of Input(W) should be equal to "
          "the second dimension of Input(Y). But received the last dimension "
          "of Input(W) is %d, the second dimension of Input(Y) is %d.",
          w_dims[2],
          y_dims[1]));

  int64_t out_dim_0 = -1;
  int64_t tmp_dim_0 = -1;
  if (!config.is_runtime) {
    out->share_lod(x);
    std::vector<int64_t> out_dims_vec{out_dim_0};
    out_dims_vec.push_back(1);
    std::vector<int64_t> tmp_dims_vec{tmp_dim_0};
    tmp_dims_vec.push_back(1);
    out->set_dims(common::make_ddim(out_dims_vec));
    out->set_dtype(x.dtype());
    tmp->set_dims(common::make_ddim(tmp_dims_vec));
    tmp->set_dtype(x.dtype());
  }
}

void MatrixRankAtolRtolInferMeta(const MetaTensor& x,
                                 const MetaTensor& atol,
                                 const MetaTensor& rtol,
                                 bool hermitian,
                                 MetaTensor* out) {
  if (x.numel() == 0) {
    auto dim_x = x.dims();
    PADDLE_ENFORCE_GE(dim_x.size(),
                      2,
                      common::errors::InvalidArgument(
                          "The dims of input must be greater than 2"));

    DDim dim_x_batch = detail::CheckAndGetOutputDim(dim_x);
    out->set_dims(dim_x_batch);
    out->share_lod(x);
    return;
  }
  MatrixRankTolInferMeta(x, atol, true, hermitian, out);
}

void MultiClassNMSInferMeta(const MetaTensor& bboxes,
                            const MetaTensor& scores,
                            const MetaTensor& rois_num,
                            float score_threshold,
                            int nms_top_k,
                            int keep_top_k,
                            float nms_threshold,
                            bool normalized,
                            float nms_eta,
                            int background_label,
                            MetaTensor* out,
                            MetaTensor* index,
                            MetaTensor* nms_rois_num,
                            MetaConfig config) {
  auto box_dims = bboxes.dims();
  auto score_dims = scores.dims();
  auto score_size = score_dims.size();

  if (config.is_runtime) {
    PADDLE_ENFORCE_EQ(
        score_size == 2 || score_size == 3,
        true,
        errors::InvalidArgument("The rank of Input(Scores) must be 2 or 3"
                                ". But received rank = %d",
                                score_size));
    PADDLE_ENFORCE_EQ(
        box_dims.size(),
        3,
        errors::InvalidArgument("The rank of Input(BBoxes) must be 3"
                                ". But received rank = %d",
                                box_dims.size()));
    if (score_size == 3) {
      PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 ||
                            box_dims[2] == 16 || box_dims[2] == 24 ||
                            box_dims[2] == 32,
                        true,
                        errors::InvalidArgument(
                            "The last dimension of Input"
                            "(BBoxes) must be 4 or 8, "
                            "represents the layout of coordinate "
                            "[xmin, ymin, xmax, ymax] or "
                            "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or "
                            "8 points: [xi, yi] i= 1,2,...,8 or "
                            "12 points: [xi, yi] i= 1,2,...,12 or "
                            "16 points: [xi, yi] i= 1,2,...,16"));
      PADDLE_ENFORCE_EQ(
          box_dims[1],
          score_dims[2],
          errors::InvalidArgument(
              "The 2nd dimension of Input(BBoxes) must be equal to "
              "last dimension of Input(Scores), which represents the "
              "predicted bboxes."
              "But received box_dims[1](%s) != score_dims[2](%s)",
              box_dims[1],
              score_dims[2]));
    } else {
      PADDLE_ENFORCE_EQ(box_dims[2],
                        4,
                        errors::InvalidArgument(
                            "The last dimension of Input"
                            "(BBoxes) must be 4. But received dimension = %d",
                            box_dims[2]));
      PADDLE_ENFORCE_EQ(
          box_dims[1],
          score_dims[1],
          errors::InvalidArgument(
              "The 2nd dimension of Input"
              "(BBoxes) must be equal to the 2nd dimension of Input(Scores). "
              "But received box dimension = %d, score dimension = %d",
              box_dims[1],
              score_dims[1]));
    }
  }
  PADDLE_ENFORCE_NE(out,
                    nullptr,
                    errors::InvalidArgument(
                        "The out in MultiClassNMSInferMeta can't be nullptr."));
  PADDLE_ENFORCE_NE(
      index,
      nullptr,
      errors::InvalidArgument(
          "The index in MultiClassNMSInferMeta can't be nullptr."));
  // Here the box_dims[0] is not the real dimension of output.
  // It will be rewritten in the computing kernel.

  out->set_dims(common::make_ddim({-1, box_dims[2] + 2}));
  out->set_dtype(bboxes.dtype());
  index->set_dims(common::make_ddim({-1, 1}));
  index->set_dtype(DataType::INT32);
  nms_rois_num->set_dims(common::make_ddim({-1}));
  nms_rois_num->set_dtype(DataType::INT32);
}

void MoeCombineInferMeta(const MetaTensor& x,
                         const MetaTensor& combine_weights,
                         const MetaTensor& scatter_index,
                         MetaTensor* y) {
  auto x_dim = x.dims();
  auto combine_weights_shape = combine_weights.dims();
  PADDLE_ENFORCE_EQ(x_dim.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(x) must be 1, but "
                        "received dimensions of "
                        "Input(x) is [%d]",
                        x_dim.size()));
  // maybe there is more conditions here....
  y->set_dims(phi::make_ddim({combine_weights_shape[0], x_dim[1]}));
  y->set_dtype(x.dtype());
}

void MoeCombineNoWeightInferMeta(const MetaTensor& x,
                                 const MetaTensor& combine_weights,
                                 const MetaTensor& scatter_index,
                                 float epsilon,
                                 MetaTensor* y) {
  auto x_dim = x.dims();
  auto scatter_index_dim = scatter_index.dims();
  PADDLE_ENFORCE_EQ(x_dim.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(x) must be 2, but "
                        "received dimensions of Input(x) is [%d]",
                        x_dim.size()));
  PADDLE_ENFORCE_EQ(scatter_index_dim.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(scatter_index) must be 2, but "
                        "received dimensions of Input(scatter_index) is [%d]",
                        scatter_index_dim.size()));
  PADDLE_ENFORCE_EQ(scatter_index.dtype(),
                    phi::DataType::INT32,
                    common::errors::InvalidArgument(
                        "The input scatter_index type should be int32"
                        "But received scatter_index type = %s",
                        scatter_index.dtype()));
  int64_t seqlen = scatter_index_dim[0];
  int64_t k = scatter_index_dim[1];
  int64_t hidden_size = x_dim[1];
  PADDLE_ENFORCE_EQ(x_dim[0],
                    seqlen * k,
                    common::errors::InvalidArgument(
                        "The upper dim of Input(x) [%d] must equal to "
                        "the total size of Input(scatter_index) [%d].",
                        x_dim[0],
                        seqlen * k));
  y->set_dims(phi::make_ddim({seqlen, hidden_size}));
  y->set_dtype(x.dtype());
}

void MoeGateDispatchPartialNoSoftmaxTopKInferMeta(
    const MetaTensor& x,
    const MetaTensor& combine_weights,
    const MetaTensor& expert_id,
    int64_t k,
    int64_t capacity,
    int64_t num_experts,
    bool use_pad,
    int64_t expert_start_index,
    int64_t expert_end_index,
    bool reverse_token_drop,
    MetaTensor* y,
    MetaTensor* combine_weights_out,
    MetaTensor* scatter_index,
    MetaTensor* scatter_index_rev,
    MetaTensor* expert_offset,
    MetaTensor* expert_nums_local) {
  auto x_dims = x.dims();
  PADDLE_ENFORCE_EQ(x_dims.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(x) must be 2, but "
                        "received dimensions of "
                        "Input(x) is [%d]",
                        x_dims.size()));
  auto combine_weights_dims = combine_weights.dims();
  PADDLE_ENFORCE_EQ(
      combine_weights_dims.size(),
      2,
      common::errors::InvalidArgument(
          "The dimensions of Input(combine_weights) must be 2, but "
          "received dimensions of "
          "Input(combine_weights) is [%d]",
          combine_weights_dims.size()));
  PADDLE_ENFORCE_EQ(combine_weights_dims[0],
                    x_dims[0],
                    common::errors::InvalidArgument(
                        "The first dimensions of Input(combine_weights) must "
                        "be equal to the first "
                        "dimension of Input(x), but received "
                        "Input(combine_weights) shape is [%d],"
                        "Input(x) shape is [%d]",
                        combine_weights_dims[0],
                        x_dims[0]));
  PADDLE_ENFORCE_GT(expert_end_index,
                    expert_start_index,
                    common::errors::InvalidArgument(
                        "expert_end_index must be greater than "
                        "expert_start_index, but received "
                        "expert_end_index = %d, expert_start_index = %d",
                        expert_end_index,
                        expert_start_index));
  PADDLE_ENFORCE_EQ(
      combine_weights.dtype(),
      phi::DataType::FLOAT32,
      common::errors::InvalidArgument("The dtype of Input(combine_weights) "
                                      "must be FLOAT32, but received %s",
                                      combine_weights.dtype()));
  PADDLE_ENFORCE_EQ(
      expert_id.dtype(),
      phi::DataType::INT32,
      common::errors::InvalidArgument(
          "The dtype of Input(expert_id) must be INT32, but received %s",
          expert_id.dtype()));
  PADDLE_ENFORCE_GT(k,
                    0,
                    common::errors::InvalidArgument(
                        "k must be greater than 0, but received k = %d", k));
  PADDLE_ENFORCE_GT(
      x_dims[0],
      0,
      common::errors::InvalidArgument(
          "num_rows must be greater than 0, but received num_rows = %d",
          x_dims[0]));
  PADDLE_ENFORCE_GE(num_experts,
                    k,
                    common::errors::InvalidArgument(
                        "num_experts must be greater than or equal to k, but "
                        "received num_experts = %d, k = %d",
                        num_experts,
                        k));
  PADDLE_ENFORCE_EQ(
      !reverse_token_drop || !use_pad,
      true,
      common::errors::InvalidArgument(
          "use_pad must be false when reverse_token_drop is true, but received "
          "use_pad = %d, reverse_token_drop = %d",
          use_pad,
          reverse_token_drop));
  PADDLE_ENFORCE_EQ(
      combine_weights.dtype(),
      phi::DataType::FLOAT32,
      common::errors::InvalidArgument("The dtype of Input(combine_weights) "
                                      "must be FLOAT32, but received %s",
                                      combine_weights.dtype()));
  // int64_t num_experts_diff = expert_end_index - expert_start_index;
  int64_t num_rows = x_dims[0];
  // if (use_pad)
  //   y->set_dims({num_experts_diff * capacity, x_dims[1]}) ;
  y->set_dims({-1, x_dims[1]});
  y->set_dtype(x.dtype());
  scatter_index->set_dims({k, num_rows});
  scatter_index->set_dtype(phi::DataType::INT32);
  scatter_index_rev->set_dims({num_experts * capacity});
  scatter_index_rev->set_dtype(phi::DataType::INT32);
  expert_offset->set_dims({num_experts});
  expert_offset->set_dtype(phi::DataType::INT64);
  expert_nums_local->set_dims({num_experts});
  expert_nums_local->set_dtype(phi::DataType::INT64);
  combine_weights_out->set_dims(combine_weights_dims);
  combine_weights_out->set_dtype(combine_weights.dtype());
  // combine_weights_out->share_meta(combine_weights);
}

void MoeGateDispatchPermuteInferMeta(const MetaTensor& x,
                                     const MetaTensor& gate_logits,
                                     const MetaTensor& corr_bias,
                                     int64_t k,
                                     int64_t capacity,
                                     int64_t world_size,
                                     MetaTensor* y,
                                     MetaTensor* combine_weights,
                                     MetaTensor* scatter_index,
                                     MetaTensor* expert_offset,
                                     MetaTensor* expert_id) {
  auto x_dims = x.dims();
  PADDLE_ENFORCE_EQ(x_dims.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(x) must be 2, but "
                        "received dimensions of "
                        "Input(x) is [%d]",
                        x_dims.size()));
  auto gate_logits_dims = gate_logits.dims();
  PADDLE_ENFORCE_EQ(gate_logits_dims.size(),
                    2,
                    common::errors::InvalidArgument(
                        "The dimensions of Input(gate_logits) must be 2, but "
                        "received dimensions of "
                        "Input(gate_logits) is [%d]",
                        gate_logits_dims.size()));
  PADDLE_ENFORCE_EQ(gate_logits_dims[0],
                    x_dims[0],
                    common::errors::InvalidArgument(
                        "The first dimensions of Input(gate_logits) must be "
                        "equal to the first "
                        "dimension of Input(x), but received "
                        "Input(gate_logits) shape is [%d],"
                        "Input(x) shape is [%d]",
                        gate_logits_dims[0],
                        x_dims[0]));
  PADDLE_ENFORCE_EQ(
      gate_logits_dims[1] % world_size,
      0,
      common::errors::InvalidArgument(
          "The number of experts (the second dimension of Input(gate_logits)) "
          "must be divisible by world_size, but received "
          "num_experts = %d, world_size = %d",
          gate_logits_dims[1],
          world_size));

  PADDLE_ENFORCE_GE(gate_logits_dims[1],
                    k,
                    common::errors::InvalidArgument(
                        "The number of experts ((the second dimension of "
                        "Input(gate_logits))) must be greater than or equal to "
                        "k, but received "
                        "num_experts = %d, k = %d",
                        gate_logits_dims[1],
                        k));

  PADDLE_ENFORCE_EQ(
      gate_logits.dtype(),
      phi::DataType::FLOAT32,
      common::errors::InvalidArgument(
          "The dtype of Input(gate_logits) must be FLOAT32, but received %s",
          gate_logits.dtype()));

  if (corr_bias) {
    auto corr_bias_dims = corr_bias.dims();
    PADDLE_ENFORCE_EQ(
        corr_bias_dims.size(),
        1,
        common::errors::InvalidArgument(
            "The dimensions of Input(corr_bias) must be 1, but received "
            "dimensions of Input(corr_bias) is [%d]",
            corr_bias_dims.size()));
    PADDLE_ENFORCE_EQ(
        corr_bias.dtype(),
        paddle::DataType::FLOAT32,
        common::errors::InvalidArgument(
            "The dtype of Input(corr_bias) must be FLOAT32, but received %s",
            corr_bias.dtype()));
  }
  int64_t num_experts = gate_logits_dims[1];
  int64_t num_local_experts = num_experts / world_size;
  int64_t num_rows = x_dims[0];
  y->set_dims({num_local_experts, world_size, capacity, x_dims[1]});
  y->set_dtype(x.dtype());
  combine_weights->set_dims({num_rows, k});
  combine_weights->set_dtype(phi::DataType::FLOAT32);
  scatter_index->set_dims({k, num_rows});
  scatter_index->set_dtype(phi::DataType::INT32);
  expert_offset->set_dims({num_experts});
  expert_offset->set_dtype(phi::DataType::INT64);
  expert_id->set_dims({num_rows, k});
  expert_id->set_dtype(phi::DataType::INT32);
}

void MoeGateDispatchAndQuantInferMeta(const MetaTensor& x,
                                      const MetaTensor& gate_logits,
                                      const MetaTensor& corr_bias,
                                      const int64_t k,
                                      const int64_t capacity,
                                      const bool use_pad,
                                      const bool use_pow2_scale,
                                      MetaTensor* fp8_out,
                                      MetaTensor* scale,
                                      MetaTensor* combine_weights,
                                      MetaTensor* scatter_index,
                                      MetaTensor* expert_offset,
                                      MetaTensor* expert_id) {
  auto x_dims = x.dims();
  auto gate_logits_dims = gate_logits.dims();

  const int64_t num_rows = x_dims[0];
  const int64_t num_experts = gate_logits_dims[1];

  PADDLE_ENFORCE_EQ(
      x_dims.size(),
      2,
      errors::InvalidArgument("Input x should have 2 dimensions"));

  PADDLE_ENFORCE_EQ(
      gate_logits_dims.size(),
      2,
      errors::InvalidArgument("Input gate_logits should have 2 dimensions"));

  PADDLE_ENFORCE_EQ(
      x_dims[0],
      gate_logits_dims[0],
      errors::InvalidArgument(
          "The 0-th dimension of x [%d] "
          "must match that of the 0-th dimension gate_logits [%d].",
          x_dims[0],
          gate_logits_dims[0]));

  PADDLE_ENFORCE_EQ(gate_logits_dims[1] >= k,
                    true,
                    errors::InvalidArgument(
                        "The 1-th dimension of gate_logits [%d] "
                        "must be greater than or equal to that of k [%d].",
                        gate_logits_dims[1],
                        k));

  PADDLE_ENFORCE_EQ(
      x_dims[1] % 128,
      0,
      common::errors::InvalidArgument("The last dimensions of Input(x) must be "
                                      "divided to tile size, but received "
                                      "Input(x) shape is [%d]",
                                      x_dims[0]));

  PADDLE_ENFORCE_EQ(
      x.dtype(),
      phi::DataType::BFLOAT16,
      common::errors::InvalidArgument(
          "The dtype of Input(x) must be BFLOAT16, but received %s",
          x.dtype()));

  if (corr_bias) {
    auto corr_bias_dims = corr_bias.dims();
    PADDLE_ENFORCE_EQ(
        corr_bias_dims.size(),
        1,
        common::errors::InvalidArgument(
            "The dimensions of Input(corr_bias) must be 1, but received "
            "dimensions of Input(corr_bias) is [%d]",
            corr_bias_dims.size()));
    PADDLE_ENFORCE_EQ(
        corr_bias.dtype(),
        paddle::DataType::FLOAT32,
        common::errors::InvalidArgument(
            "The dtype of Input(corr_bias) must be FLOAT32, but received %s",
            corr_bias.dtype()));
  }
  std::vector<int64_t> fp8_out_dims;
  std::vector<int64_t> scale_dims;
  if (use_pad) {
    fp8_out_dims = {num_experts * capacity, x_dims[1]};
    scale_dims = {num_experts * capacity, x_dims[1] / 128};
  } else {
    fp8_out_dims = {num_rows * k, x_dims[1]};
    scale_dims = {num_rows * k, x_dims[1] / 128};
  }

  fp8_out->set_dims(common::make_ddim(fp8_out_dims));
  fp8_out->set_dtype(paddle::DataType::FLOAT8_E4M3FN);

  scale->set_dims(common::make_ddim(scale_dims));
  scale->set_dtype(paddle::DataType::FLOAT32);

  combine_weights->set_dims(common::make_ddim({num_rows, k}));
  combine_weights->set_dtype(phi::DataType::FLOAT32);

  scatter_index->set_dims(common::make_ddim({k, num_rows}));
  scatter_index->set_dtype(phi::DataType::INT32);

  expert_offset->set_dims(common::make_ddim({num_experts}));
  expert_offset->set_dtype(phi::DataType::INT64);

  expert_id->set_dims(common::make_ddim({num_rows, k}));
  expert_id->set_dtype(phi::DataType::INT32);
}

void MovingAverageAbsMaxScaleInferMeta(const MetaTensor& x,
                                       const MetaTensor& in_accum,
                                       const MetaTensor& in_state,
                                       MetaTensor* out,
                                       MetaTensor* out_scale,
                                       MetaTensor* out_state,
                                       MetaTensor* out_accum) {
  if (out) {
    out->set_dims(x.dims());
    out->share_lod(x);
    out_scale->set_dims({1});
  }
  if (out_state) {
    out_state->set_dims(in_state.dims());
  }
  if (out_accum) {
    out_accum->set_dims(in_accum.dims());
  }
}

void NllLossRawInferMeta(const MetaTensor& input,
                         const MetaTensor& label,
                         const MetaTensor& weight,
                         int64_t ignore_index,
                         const std::string& reduction,
                         MetaTensor* out,
                         MetaTensor* total_weight,
                         MetaConfig config) {
  auto x_dims = input.dims();
  auto label_dims = label.dims();
  PADDLE_ENFORCE_EQ(x_dims.size() == 2 || x_dims.size() == 4,
                    true,
                    common::errors::InvalidArgument(
                        "The tensor rank of Input(X) must be 2 or 4."));
  bool contain_unknown_dim = common::contain_unknown_dim(x_dims) ||
                             common::contain_unknown_dim(label_dims);
  bool check = config.is_runtime || !contain_unknown_dim;
  if (check) {
    PADDLE_ENFORCE_EQ(
        x_dims[0],
        label_dims[0],
        common::errors::InvalidArgument(
            "ShapeError: Expected input batch_size to match label batch_size,"
            "But received: the Input(x) batch_size is [%s], the Input(label) "
            " batch_size is [%s].",
            x_dims[0],
            label_dims[0]));
    if (weight) {
      auto w_dims = weight.dims();
      PADDLE_ENFORCE_EQ(w_dims.size(),
                        1,
                        common::errors::InvalidArgument(
                            "Input(Weight) should be a 1D tensor."));
      PADDLE_ENFORCE_EQ(
          x_dims[1],
          w_dims[0],
          common::errors::InvalidArgument(
              "Expected input tensor Weight's size should equal "
              "to the first dimension of the input tensor X. But received "
              "Weight's "
              "size is %d, the first dimension of input X is %d",
              w_dims[0],
              x_dims[1]));
    }
  }
  if (x_dims.size() == 2) {
    if (reduction == "none") {
      out->set_dims({x_dims[0]});
    } else {
      out->set_dims(common::make_ddim({}));
    }
  } else if (x_dims.size() == 4) {
    PADDLE_ENFORCE_EQ(label_dims.size(),
                      3,
                      common::errors::InvalidArgument(
                          "Expected Input(Label) dimensions=3, received %d.",
                          label_dims.size()));
    auto input0 = x_dims[0];
    auto input2 = x_dims[2];
    auto input3 = x_dims[3];
    auto label0 = label_dims[0];
    auto label1 = label_dims[1];
    auto label2 = label_dims[2];
    PADDLE_ENFORCE_EQ(
        input0 == label0 && input2 == label1 && input3 == label2,
        true,
        common::errors::InvalidArgument("Input(X) tensor shape should "
                                        "match to Input(Label) tensor "
                                        "shape."));
    if (reduction == "none") {
      out->set_dims({x_dims[0], x_dims[2], x_dims[3]});
    } else {
      out->set_dims(common::make_ddim({}));
    }
  }
  total_weight->set_dims(common::make_ddim({}));
  out->set_dtype(input.dtype());
  total_weight->set_dtype(input.dtype());
}

void PutAlongAxisInferMeta(const MetaTensor& x,
                           const MetaTensor& index,
                           const MetaTensor& value,
                           int axis,
                           const std::string& reduce,
                           MetaTensor* out) {
  out->set_dims(x.dims());
  out->set_dtype(x.dtype());
}

void PushGpupsSparseInferMeta(const std::vector<const MetaTensor*>& ids,
                              const std::vector<const MetaTensor*>& out,
                              const std::vector<int>& size,
                              bool is_sparse,
                              bool is_distributed,
                              std::vector<MetaTensor*> out_grad) {}

void RandomRoutingInferMeta(const MetaTensor& prob,
                            const MetaTensor& topk_value,
                            const MetaTensor& topk_idx,
                            MetaTensor* out) {
  // check dims
  auto topk_val_dims = topk_value.dims();
  auto prob_dims = prob.dims();
  auto topk_idx_dims = topk_idx.dims();

  PADDLE_ENFORCE_EQ(prob_dims[0],
                    topk_val_dims[0],
                    common::errors::InvalidArgument(
                        "Output(Out) of ScatterNdAddOp should not be null."));

  PADDLE_ENFORCE_EQ(topk_idx_dims[1],
                    topk_val_dims[1],
                    common::errors::InvalidArgument(
                        "Output(Out) of ScatterNdAddOp should not be null."));

  PADDLE_ENFORCE_EQ(topk_idx_dims[0],
                    topk_val_dims[0],
                    common::errors::InvalidArgument(
                        "Output(Out) of ScatterNdAddOp should not be null."));

  out->set_dims(topk_idx_dims);
  out->set_dtype(topk_idx.dtype());
  out->share_lod(topk_idx);
}

void RankAttentionInferMeta(const MetaTensor& x,
                            const MetaTensor& rank_offset,
                            const MetaTensor& rank_param,
                            int max_rank,
                            int max_size,
                            MetaTensor* input_help,
                            MetaTensor* out,
                            MetaTensor* ins_rank) {
  auto x_dims = x.dims();
  auto ins_num = x_dims[0];
  auto param_dims = rank_param.dims();
  auto para_col = param_dims[1];
  auto rank_offset_dims = rank_offset.dims();
  auto x_fea_dim = x_dims[1];
  auto block_matrix_row = max_rank * x_fea_dim;

  PADDLE_ENFORCE_EQ(
      (rank_offset_dims[1] - 1) / 2,
      max_rank,
      common::errors::InvalidArgument("Input(RankOffset) has wrong columns, "
                                      "except columns to be %d, but got %d",
                                      max_rank,
                                      (rank_offset_dims[1] - 1) / 2));

  std::vector<int64_t> out_dims({ins_num, para_col});
  out->set_dims(common::make_ddim(out_dims));
  out->set_dtype(x.dtype());

  std::vector<int64_t> input_help_dims({ins_num, block_matrix_row});
  input_help->set_dims(common::make_ddim(input_help_dims));
  input_help->set_dtype(x.dtype());

  std::vector<int64_t> ins_rank_dims({ins_num, 1});
  ins_rank->set_dims(common::make_ddim(ins_rank_dims));
  ins_rank->set_dtype(x.dtype());

  out->share_lod(x);
}

void RoiAlignInferMeta(const MetaTensor& x,
                       const MetaTensor& boxes,
                       const MetaTensor& boxes_num,
                       int pooled_height,
                       int pooled_width,
                       float spatial_scale,
                       int sampling_ratio,
                       bool aligned,
                       MetaTensor* out,
                       MetaConfig config) {
  auto input_dims = x.dims();
  auto boxes_dims = boxes.dims();

  if (boxes_num) {
    auto boxes_num_dims = boxes_num.dims();
    PADDLE_ENFORCE_EQ(
        boxes_num_dims.size(),
        1,
        common::errors::InvalidArgument("The size of boxes_num should be 1"
                                        ", but received size = %d",
                                        boxes_num_dims.size()));
  }
  PADDLE_ENFORCE_EQ(input_dims.size(),
                    4,
                    common::errors::InvalidArgument(
                        "The format of Input(x) in "
                        "RoiAlignOp is NCHW. And the rank of input must be 4. "
                        "But received rank = %d",
                        input_dims.size()));
  PADDLE_ENFORCE_EQ(
      boxes_dims.size(),
      2,
      common::errors::InvalidArgument("The rank of Input(boxes) "
                                      "in RoiAlignOp should be 2. "
                                      "But the rank of boxes is %d",
                                      boxes_dims.size()));
  if (config.is_runtime) {
    PADDLE_ENFORCE_EQ(boxes_dims[1],
                      4,
                      common::errors::InvalidArgument(
                          "The second dimension "
                          "of Input(boxes) should be 4. But received the "
                          "dimension = %d",
                          boxes_dims[1]));
  }

  PADDLE_ENFORCE_GT(pooled_height,
                    0,
                    common::errors::InvalidArgument(
                        "The 'pooled_height' attribute in RoiAlignOp is "
                        "invalid. The height must be greater than 0. But "
                        "received 'pooled_height' = %d",
                        pooled_height));
  PADDLE_ENFORCE_GT(pooled_width,
                    0,
                    common::errors::InvalidArgument(
                        "The 'pooled_width' attribute in RoiAlignOp is "
                        "invalid. The width must be greater than 0. But "
                        "received 'pooled_width' = %d",
                        pooled_width));
  PADDLE_ENFORCE_GT(spatial_scale,
                    0.0f,
                    common::errors::InvalidArgument(
                        "The 'spatial_scale' attribute in RoiAlignOp is "
                        "invalid. The scale must be greater than 0. But "
                        "received 'spatial_scale' = %f",
                        spatial_scale));

  auto out_dims = input_dims;
  out_dims[0] = boxes_dims[0];
  out_dims[1] = input_dims[1];
  out_dims[2] = pooled_height;
  out_dims[3] = pooled_width;

  out->set_dims(out_dims);
  out->set_dtype(x.dtype());
}

void RoiPoolInferMeta(const MetaTensor& x,
                      const MetaTensor& boxes,
                      const MetaTensor& boxes_num,
                      int pooled_height,
                      int pooled_width,
                      float spatial_scale,
                      MetaTensor* out,
                      MetaTensor* arg_max) {
  auto input_dims = x.dims();
  auto boxes_dims = boxes.dims();

  if (boxes_num) {
    auto boxes_num_dims = boxes_num.dims();
    PADDLE_ENFORCE_EQ(boxes_num_dims.size(),
                      1,
                      common::errors::InvalidArgument(
                          "The second dimension of boxes_num should "
                          "be 1, but received dimension is %d",
                          boxes_num_dims.size()));
  }
  PADDLE_ENFORCE_EQ(input_dims.size(),
                    4,
                    common::errors::InvalidArgument(
                        "The input data should be a four-dimensional "
                        "tensor with [N,C,H,W], but received input data with "
                        " %d dimension",
                        input_dims.size()));
  PADDLE_ENFORCE_EQ(
      boxes_dims.size(),
      2,
      common::errors::InvalidArgument(
          "boxes should be a 2-D DenseTensor with shape (num_boxes, 4)"
          "given as [[x1, y1, x2, y2], ...], but received boxes is "
          "%d-dimensional DenseTensor",
          boxes_dims.size()));
  PADDLE_ENFORCE_EQ(
      boxes_dims[1],
      4,
      common::errors::InvalidArgument(
          "boxes should be a 2-D DenseTensor with shape (num_boxes, 4)"
          "given as [[x1, y1, x2, y2], ...]. But the second dimension of  "
          "the received data is %d",
          boxes_dims[1]));

  PADDLE_ENFORCE_GT(pooled_height,
                    0,
                    common::errors::OutOfRange(
                        "The pooled output height must be greater than 0"
                        "but received height is %d",
                        pooled_height));
  PADDLE_ENFORCE_GT(pooled_width,
                    0,
                    common::errors::OutOfRange(
                        "The pooled output width must be greater than 0"
                        "but received width is %d",
                        pooled_width));
  PADDLE_ENFORCE_GT(
      spatial_scale,
      0.0f,
      common::errors::OutOfRange("The spatial scale must be greater than 0, "
                                 "but received spatial scale is %f",
                                 spatial_scale));

  auto out_dims = input_dims;
  out_dims[0] = boxes_dims[0];
  out_dims[1] = input_dims[1];
  out_dims[2] = pooled_height;
  out_dims[3] = pooled_width;

  out->set_dims(out_dims);
  out->set_dtype(x.dtype());
  arg_max->set_dims(out_dims);
  arg_max->set_dtype(DataType::INT64);
}

void ScatterInferMeta(const MetaTensor& x,
                      const MetaTensor& index,
                      const MetaTensor& updates,
                      bool overwrite,
                      MetaTensor* out) {
  const auto& updates_dims = updates.dims();
  const auto& ref_dims = x.dims();
  const auto& index_dims = index.dims();

  if (index_dims.size() == 2) {
    if (index_dims[1] != 0) {
      PADDLE_ENFORCE_EQ(index_dims[1],
                        1,
                        common::errors::InvalidArgument(
                            "The last dim of the index should be 1 when the "
                            "index is a 2D tensor, but we get %d.",
                            index_dims[1]));
    }
  } else {
    PADDLE_ENFORCE_EQ(index_dims.size() == 1 || index_dims.size() == 0,
                      true,
                      common::errors::InvalidArgument(
                          "The index should be a 0D or 1D tensor when the "
                          "index is not a 2D tensor, but we get %d.",
                          index_dims.size()));
  }
  if (index_dims.size() != 0) {
    PADDLE_ENFORCE_EQ(
        (ref_dims.size() == updates_dims.size()),
        true,
        common::errors::InvalidArgument(
            "When the Input(Index) is not a 0D tensor, the "
            "Input(X) and Input(Updates) should have the same shape size, "
            "but received the size of Input(x)'s shape is %d, the size of "
            "Input(Updates)'s shape is %d.",
            ref_dims.size(),
            updates_dims.size()));
    if (index_dims[0] != -1 && updates_dims[0] != -1) {
      PADDLE_ENFORCE_LE(
          index_dims[0],
          updates_dims[0],
          common::errors::InvalidArgument(
              "The first dimension size of Input(Index) should be no greater "
              "than Input(Updates), but received first dimension size of "
              "Input(Index) is %d, Input(Updates) is  %d.",
              index_dims[0],
              updates_dims[0]));
    }
  } else {
    PADDLE_ENFORCE_EQ(
        (ref_dims.size() - 1 == updates_dims.size()),
        true,
        common::errors::InvalidArgument(
            "When the Input(Index) is a 0D tensor, the "
            "Input(Updates) should have the shape size as Input(X)'s "
            "shape size - 1. But received the size of Input(x)'s shape is %d, "
            " the size of Input(Updates)'s shape is %d.",
            ref_dims.size(),
            updates_dims.size()));
  }
  out->set_dims(ref_dims);
  out->share_lod(x);
  out->set_dtype(x.dtype());
}

void ScatterNdAddInferMeta(const MetaTensor& x,
                           const MetaTensor& index,
                           const MetaTensor& updates,
                           MetaTensor* out) {
  const auto& ref_dims = x.dims();
  auto ref_dims_size = ref_dims.size();
  const auto& index_dims = index.dims();
  int index_dims_size = static_cast<int>(index_dims.size());
  const auto& updates_dims = updates.dims();
  auto updates_dims_size = updates_dims.size();

  if (updates_dims_size == 0) {
    // check for 0d updates
    PADDLE_ENFORCE_EQ(
        index_dims_size,
        1,
        common::errors::InvalidArgument("When the updates is a 0d tensor, the "
                                        "index should be a 1d tensor."));
    PADDLE_ENFORCE_EQ(
        index_dims[index_dims_size - 1],
        ref_dims_size,
        common::errors::InvalidArgument(
            "When the update is a 0d tensor, The last dimension of "
            "Input(Index)'s shape should be equal with the rank of Input(X)."));
  } else {
    PADDLE_ENFORCE_LE(
        index_dims[index_dims_size - 1],
        ref_dims_size,
        common::errors::InvalidArgument(
            "The last dimension of Input(Index)'s shape should be no greater "
            "than the rank of Input(X), but received the last dimension of "
            "Input(Index)'s shape is %d, the rank of Input(X) is %d.",
            index_dims[index_dims_size - 1],
            ref_dims_size));
    PADDLE_ENFORCE_GE(index_dims_size,
                      1UL,
                      common::errors::InvalidArgument(
                          "The rank of Input(Index) should be greater than 1, "
                          "but received the rank of Input(Index) is %d.",
                          index_dims_size));

    // update.shape = index.shape[:-1] + output.shape[index.shape[-1]:]
    std::vector<int64_t> r_updates_dims;
    bool without_dynamic_shape = true;
    for (int i = 0; i < index_dims_size - 1; ++i) {
      if (index_dims[i] == -1) {
        without_dynamic_shape = false;
      }
      r_updates_dims.emplace_back(index_dims[i]);
    }
    for (int64_t i = index_dims[index_dims_size - 1]; i < ref_dims_size; ++i) {
      if (ref_dims[i] == -1) {
        without_dynamic_shape = false;
      }
      r_updates_dims.emplace_back(ref_dims[i]);
    }
    // check for non-0d updates
    PADDLE_ENFORCE_EQ(
        r_updates_dims.size(),
        updates_dims_size,
        common::errors::InvalidArgument(
            "Updates has wrong shape. The shape of Updates and "
            "Input(Updates) "
            "should be same, but received the shape of Updates is %d, "
            "the shape of Input(Updates) is %d.",
            r_updates_dims.size(),
            updates_dims_size));
    if (without_dynamic_shape) {
      for (int64_t i = 0; i < updates_dims_size; ++i) {
        PADDLE_ENFORCE_EQ(
            r_updates_dims[i],
            updates_dims[i],
            common::errors::InvalidArgument(
                "Updates has wrong shape. The dimensions of Updates and "
                "Input(Updates) should match, but received Updates's"
                "%d-th dimension is %d, Input(Updates)'s %d-th "
                "dimension is %d.",
                i,
                r_updates_dims[i],
                i,
                updates_dims[i]));
      }
    }
  }
  out->set_dims(ref_dims);
  out->share_lod(x);
  out->set_dtype(x.dtype());
}

void SendURecvInferMeta(const MetaTensor& x,
                        const MetaTensor& src_index,
                        const MetaTensor& dst_index,
                        const std::string& reduce_op,
                        const IntArray& out_size,
                        MetaTensor* out,
                        MetaTensor* dst_count) {
  auto src_index_dims = src_index.dims();
  if (src_index_dims.size() == 2) {
    PADDLE_ENFORCE_EQ(src_index_dims[1],
                      1,
                      common::errors::InvalidArgument(
                          "The last dim of Src_index should be 1 when it "
                          "is 2D, but we get %d",
                          src_index_dims[1]));
  } else {
    PADDLE_ENFORCE_EQ(
        src_index_dims.size(),
        1,
        common::errors::InvalidArgument(
            "The Src_index should be 1D, when it is not 2D, but we get %d",
            src_index_dims.size()));
  }

  auto dst_index_dims = dst_index.dims();
  if (dst_index_dims.size() == 2) {
    PADDLE_ENFORCE_EQ(dst_index_dims[1],
                      1,
                      common::errors::InvalidArgument(
                          "The last dim of Dst_index should be 1 when it "
                          "is 2D, but we get %d",
                          dst_index_dims[1]));
  } else {
    PADDLE_ENFORCE_EQ(
        dst_index_dims.size(),
        1,
        common::errors::InvalidArgument("The Dst_index should be 1D, "
                                        "when it is not 2D, but we get %d",
                                        dst_index_dims.size()));
  }

  if (src_index_dims[0] != 0 && dst_index_dims[0] != 0) {
    PADDLE_ENFORCE_EQ(
        src_index_dims[0],
        dst_index_dims[0],
        common::errors::InvalidArgument(
            "Src_index and Dst_index should have the same shape."));
  }

  auto dims = x.dims();
  std::vector<int64_t> dims_ = common::vectorize(dims);
  dims_[0] = -1;
  out->set_dims(common::make_ddim(dims_));
  out->set_dtype(x.dtype());

  if (reduce_op == "MEAN") {
    dst_count->set_dims({-1});
    dst_count->set_dtype(DataType::INT32);
  }
}

void SequenceConvInferMeta(const MetaTensor& x,
                           const MetaTensor& padding_data,
                           const MetaTensor& filter,
                           int context_length,
                           bool padding_trainable,
                           int context_start,
                           int context_stride,
                           MetaTensor* out) {
  auto in_dims = x.dims();
  auto filter_dims = filter.dims();
  PADDLE_ENFORCE_EQ(
      context_stride,
      1,
      common::errors::InvalidArgument(
          "Currently, SequenceConvOp only supports contextStride=1. But "
          "received contextStride = %u.",
          context_stride));
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 2 && filter_dims.size() == 2,
      true,
      common::errors::InvalidArgument(
          "Input(X, Filter) should be 2-D tensor. But received Input(X): "
          "input rank %u, input shape [%s]; received Input(Filter): "
          "input rank %u, input shape [%s].",
          in_dims.size(),
          in_dims,
          filter_dims.size(),
          filter_dims));
  PADDLE_ENFORCE_EQ(
      filter_dims[0],
      context_length * in_dims[1],
      common::errors::InvalidArgument(
          "Filter's height should be context_length * "
          "input_hidden_size. But received: filter's height = %d, "
          "context_length * input_hidden_size = %d.",
          filter_dims[0],
          context_length * in_dims[1]));

  if (padding_trainable) {
    const phi::DDim& padding_dim = padding_data.dims();
    int up_pad = std::max(0, -context_start);
    int down_pad = std::max(0, context_start + context_length - 1);
    int total_pad = up_pad + down_pad;
    int64_t input_width = in_dims[1];
    bool start_equals_zero = context_start == 0;
    bool length_equals_one = context_length == 1;
    bool start_length = start_equals_zero && length_equals_one;

    PADDLE_ENFORCE_EQ(
        start_length,
        false,
        common::errors::InvalidArgument(
            "If context_start is 0 and context_length is 1, paddingTrainable "
            "should be false."));
    PADDLE_ENFORCE_EQ(
        padding_dim.size(),
        2,
        common::errors::InvalidArgument(
            "Input(PaddingData) should be 2-D tensor. But received: "
            "input rank %u, input shape [%s].",
            padding_dim.size(),
            padding_dim));
    PADDLE_ENFORCE_EQ(
        padding_dim[0] == total_pad && padding_dim[1] == input_width,
        true,
        common::errors::InvalidArgument("Input(PaddingData)'s shape is not "
                                        "consistent with 'context_start' "
                                        "and 'context_length'. Received "
                                        "Input(PaddingData): input rank "
                                        "%u, "
                                        "input shape [%s].",
                                        padding_dim.size(),
                                        padding_dim));
  }

  in_dims[1] = filter_dims[1];
  out->set_dims(in_dims);
  out->share_lod(x);
  out->set_dtype(x.dtype());
}

void SpectralNormInferMeta(const MetaTensor& weight,
                           const MetaTensor& u,
                           const MetaTensor& v,
                           int dim,
                           int power_iters,
                           float eps,
                           MetaTensor* out,
                           MetaConfig config) {
  auto dim_weight = weight.dims();
  auto rank_weight = dim_weight.size();
  PADDLE_ENFORCE_GE(rank_weight,
                    2,
                    errors::InvalidArgument(
                        "The rank of Input(Weights) should be greater equal "
                        "than 2, but received Weight rank(%d)",
                        rank_weight));
  PADDLE_ENFORCE_LE(
      rank_weight,
      5,
      errors::InvalidArgument("The rank of Input(Weights) should be less equal "
                              "than 5, but received Weight rank(%d)",
                              rank_weight));

  auto dim_valid = dim == 0 || dim == 1;
  PADDLE_ENFORCE_EQ(dim_valid,
                    true,
                    errors::InvalidArgument(
                        "Attr(dim) can only be 0 or 1, but received %d", dim));
  PADDLE_ENFORCE_GE(
      power_iters,
      0,
      errors::InvalidArgument(
          "Attr(power_iters) should be greater equal then 0, but received %d",
          power_iters));

  int64_t h = dim_weight[dim];
  int64_t w = 1;
  for (int i = 0; i < rank_weight; i++) {
    if (i != dim) {
      w *= dim_weight[i];
    }
  }
  auto dim_u = u.dims();
  auto dim_v = v.dims();

  if (config.is_runtime || (dim_u[0] > 0 && h > 0)) {
    PADDLE_ENFORCE_EQ(dim_u[0],
                      h,
                      errors::InvalidArgument(
                          "Input(U) dimension[0] should be equal to "
                          "Input(Weight) dimension[Attr(dim)], but received "
                          "U dimension[0](%d) != Weight dimension[%d](%d)",
                          dim_u[0],
                          dim,
                          h));
  }

  if (config.is_runtime || (dim_v[0] > 0 && w > 0)) {
    PADDLE_ENFORCE_EQ(
        dim_v[0],
        w,
        errors::InvalidArgument(
            "Input(V) dimension[0] should be equal to the product of "
            "Input(Weight) dimension except dimension[Attr(dim)], but "
            "received V dimension[0](%d) != product of Input(Weight) "
            "dimension(%d)",
            dim_v[0],
            w));
  }

  if (out) {
    out->set_dims(dim_weight);
    out->set_dtype(weight.dtype());
    out->share_lod(weight);
  }
}

void ViterbiDecodeInferMeta(const MetaTensor& input,
                            const MetaTensor& transition,
                            const MetaTensor& length,
                            bool include_bos_eos_tag,
                            MetaTensor* scores,
                            MetaTensor* path,
                            MetaConfig config) {
  auto in_dims = input.dims();
  PADDLE_ENFORCE_EQ(in_dims.size(),
                    3,
                    common::errors::InvalidArgument(
                        "The rank of Input in ViterbiDecode  must be 3. But "
                        "received Input's rank is %d.",
                        in_dims.size()));
  auto length_dims = length.dims();
  PADDLE_ENFORCE_EQ(length_dims.size(),
                    1,
                    common::errors::InvalidArgument(
                        "The rank of Length in ViterbiDecode must be 1. But "
                        "received Length's rank is %d.",
                        length_dims.size()));
  auto transition_dims = transition.dims();
  PADDLE_ENFORCE_EQ(
      transition_dims.size(),
      2,
      common::errors::InvalidArgument(
          "The rank of Transition in ViterbiDecode must be 2. But "
          "received Transition's rank is %d.",
          transition_dims.size()));
  if (config.is_runtime) {
    PADDLE_ENFORCE_EQ(
        in_dims[0],
        length_dims[0],
        common::errors::InvalidArgument(
            "The batch size of Input and Length should be equal."));
    PADDLE_ENFORCE_EQ(in_dims[2],
                      transition_dims[0],
                      common::errors::InvalidArgument(
                          "The number of tags of Input (%d) and Transition "
                          "(%d) should be equal.",
                          transition_dims[0],
                          in_dims[2]));
  }
  scores->set_dims(length_dims);
  scores->set_dtype(length.dtype());
}

void QuantLinearInferMeta(const MetaTensor& x,
                          const MetaTensor& w,
                          const MetaTensor& bias,
                          int in_num_col_dims,
                          const std::string& activation_type,
                          bool padding_weights,
                          float scale_in,
                          const std::vector<float>& scale_weights,
                          int quant_round_type,
                          float quant_max_bound,
                          float quant_min_bound,
                          MetaTensor* y) {
  auto w_dims = w.dims();
  PADDLE_ENFORCE_EQ(
      w_dims.size(),
      2,
      common::errors::InvalidArgument(
          "The input Weight of quant_linear is expected to be a 2-D tensor. "
          "But received the number of Weight's dimensions is %d, "
          "Weight's shape is %s.",
          w_dims.size(),
          w_dims));
  if (bias) {
    auto bias_dims = bias.dims();
    auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1];

    PADDLE_ENFORCE_LE(bias_dims.size(),
                      2,
                      common::errors::InvalidArgument(
                          "The input Bias of quant_linear is expected to be a "
                          "1-D or 2-D tensor. But "
                          "received the number of Bias's dimensions is %d, "
                          "Bias's shape is %s.",
                          bias_dims.size(),
                          bias_dims));

    PADDLE_ENFORCE_EQ(
        bias_dims[bias_dims.size() - 1],
        w_dims1,
        common::errors::InvalidArgument(
            "The last dimension of input Bias is expected be equal "
            "to the actual width of input Weight. But received the last "
            "dimension of Bias is %d, Bias's shape is %s; "
            "the actual width of Weight is %d, Weight's shape is %s.",
            bias_dims[bias_dims.size() - 1],
            bias_dims,
            w_dims1,
            w_dims));

    if (bias_dims.size() == 2) {
      PADDLE_ENFORCE_EQ(
          bias_dims[0],
          1,
          common::errors::InvalidArgument(
              "The first dimension of input Bias is expected to be 1, "
              "but received %d, Bias's shape is %s.",
              bias_dims[0],
              bias_dims));
    }
  }

  auto in_dims = x.dims();
  PADDLE_ENFORCE_LT(
      in_num_col_dims,
      in_dims.size(),
      common::errors::InvalidArgument(
          "The attribute in_num_col_dims used to flatten Input to "
          "a 2-D tensor, is expected to be less than the number of "
          "Input's dimensions. But received in_num_col_dims is %d, "
          "the number of Input's dimensions is %d, Input's shape is %s.",
          in_num_col_dims,
          in_dims.size(),
          in_dims));

  if (!activation_type.empty()) {
    PADDLE_ENFORCE_EQ(
        activation_type,
        "relu",
        common::errors::InvalidArgument(
            "The attribute activation_type of quant_linear is expected "
            "to be \"relu\", but received %s.",
            activation_type.c_str()));
  }

  std::vector<int64_t> output_dims;

  auto in_mat_dims = common::flatten_to_2d(in_dims, in_num_col_dims);
  auto w_dims0 = padding_weights ? w_dims[0] - 4 : w_dims[0];
  auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1];
  PADDLE_ENFORCE_EQ(
      in_mat_dims[1],
      w_dims0,
      common::errors::InvalidArgument(
          "The input's second dimension and weight's first dimension is "
          "expected to be the same. But received input's second dimension is "
          "%d, input's shape is %s; weight's first dimension is %d, weight's "
          "shape is %s.",
          in_mat_dims[1],
          in_mat_dims,
          w_dims0,
          common::make_ddim({w_dims0, w_dims1})));
  output_dims.reserve(static_cast<size_t>(in_num_col_dims) +
                      static_cast<size_t>(1));
  for (int i = 0; i < in_num_col_dims; ++i) {
    output_dims.push_back(in_dims[i]);
  }
  output_dims.push_back(w_dims1);

  y->set_dims(common::make_ddim(output_dims));
  y->share_lod(x);
  y->set_dtype(x.dtype());
}
void TdmSamplerInferMeta(const MetaTensor& x,
                         const MetaTensor& travel,
                         const MetaTensor& layer,
                         bool output_positive,
                         const std::vector<int>& neg_samples_num_list,
                         const std::vector<int>& layer_offset,
                         int seed,
                         int dtype,
                         MetaTensor* out,
                         MetaTensor* labels,
                         MetaTensor* mask,
                         MetaConfig config) {
  auto neg_samples_num_vec = neg_samples_num_list;
  auto output_positive_flag = output_positive;

  int64_t sample_res_length = 0;
  for (auto sample_nums : neg_samples_num_vec) {
    sample_res_length += sample_nums + (int64_t)output_positive_flag;
  }
  auto ddim = phi::make_ddim({-1, sample_res_length});
  auto input_dims = x.dims();
  if (config.is_runtime) {
    auto output_dims = phi::vectorize(input_dims);
    auto batch_size = output_dims[0];
    out->set_dims(phi::make_ddim({batch_size, sample_res_length}));
    mask->set_dims(phi::make_ddim({batch_size, sample_res_length}));
    if (labels) {
      labels->set_dims(phi::make_ddim({batch_size, sample_res_length}));
    }
  } else {
    out->set_dims(ddim);
    mask->set_dims(ddim);
    if (labels) {
      labels->set_dims(ddim);
    }
  }
  out->set_dtype(x.dtype());
  mask->set_dtype(x.dtype());
  if (labels) {
    labels->set_dtype(x.dtype());
  }
}
}  // namespace phi
